From 637ea108cd439568f09bfea3f58198d38c4c2b21 Mon Sep 17 00:00:00 2001 From: CI Date: Wed, 7 May 2025 13:04:57 +0000 Subject: [PATCH] Build branch v0.3 with version v0.3.0 (e21130f) Build pipeline: viash-hub.rnaseq.v0.3-6gfl7 Source commit: https://github.com/viash-hub/rnaseq/commit/e21130ff7a4a215d28ec1988730cbc95c0259076 Source message: Bump version to v0.3.0 --- .gitignore | 5 + CHANGELOG.md | 20 + README.md | 242 + README.qmd | 158 + _viash.yaml | 24 + assets/advanced_form.png | Bin 0 -> 46759 bytes assets/global_params.png | Bin 0 -> 84073 bytes assets/launch_workflow.png | Bin 0 -> 42957 bytes assets/nextlow_execution.png | Bin 0 -> 109931 bytes assets/parameter_set_1.png | Bin 0 -> 119018 bytes assets/parameter_set_2.png | Bin 0 -> 54564 bytes assets/seqera_cloud_execution.png | Bin 0 -> 100952 bytes bin/get_minimal_test_data.sh | 105 + bin/get_unit_test_data.sh | 50 + examples/minimal.yaml | 40 + main.nf | 3 + nextflow.config | 6 + src/assets/methods_description_template.yml | 25 + src/assets/multiqc_config.yml | 164 + src/assets/optional_file.txt | 1 + src/assets/required_file.txt | 1 + src/assets/rrna-db-defaults.txt | 8 + src/bedtools_genomecov/config.vsh.yaml | 56 + src/bedtools_genomecov/script.sh | 25 + src/bedtools_genomecov/test.sh | 22 + src/cat_additional_fasta/config.vsh.yaml | 54 + src/cat_additional_fasta/script.py | 80 + src/cat_additional_fasta/test.sh | 26 + src/cat_fastq/config.vsh.yaml | 54 + src/cat_fastq/script.sh | 20 + src/cat_fastq/test.sh | 44 + src/deseq2_qc/config.vsh.yaml | 86 + src/deseq2_qc/deseq2_clustering_header.txt | 12 + src/deseq2_qc/deseq2_pca_header.txt | 11 + src/deseq2_qc/script.r | 233 + src/deseq2_qc/test.sh | 29 + src/dupradar/config.vsh.yaml | 118 + src/dupradar/dupradar.r | 154 + src/dupradar/script.sh | 28 + src/dupradar/test.sh | 51 + src/extra/copy_if_exists/config.vsh.yaml | 32 + src/extra/copy_if_exists/script.sh | 25 + src/getchromsizes/config.vsh.yaml | 57 + src/getchromsizes/script.sh | 9 + src/getchromsizes/test.sh | 16 + src/gtf2bed/config.vsh.yaml | 45 + src/gtf2bed/gtf2bed.pl | 122 + src/gtf2bed/script.sh | 5 + src/gtf2bed/test.sh | 15 + src/gtf_filter/config.vsh.yaml | 45 + src/gtf_filter/script.py | 47 + src/gtf_filter/test.sh | 16 + src/gunzip/config.vsh.yaml | 42 + src/gunzip/script.sh | 11 + src/gunzip/test.sh | 22 + .../biotypes_header.txt | 11 + src/multiqc_custom_biotype/config.vsh.yaml | 48 + src/multiqc_custom_biotype/script.py | 91 + src/multiqc_custom_biotype/test.sh | 36 + src/picard_markduplicates/config.vsh.yaml | 69 + src/picard_markduplicates/script.sh | 17 + src/picard_markduplicates/test.sh | 19 + src/prepare_multiqc_input/config.vsh.yaml | 149 + src/prepare_multiqc_input/script.sh | 89 + .../config.vsh.yaml | 40 + src/preprocess_transcripts_fasta/script.sh | 11 + src/preprocess_transcripts_fasta/test.sh | 14 + src/preseq_lcextrap/config.vsh.yaml | 68 + src/preseq_lcextrap/script.sh | 29 + src/preseq_lcextrap/test.sh | 28 + src/rsem_merge_counts/config.vsh.yaml | 60 + src/rsem_merge_counts/script.sh | 28 + .../rseqc_junctionannotation/config.vsh.yaml | 108 + src/rseqc/rseqc_junctionannotation/script.sh | 20 + src/rseqc/rseqc_junctionannotation/test.sh | 48 + .../rseqc_junctionsaturation/config.vsh.yaml | 105 + src/rseqc/rseqc_junctionsaturation/script.sh | 19 + src/rseqc/rseqc_junctionsaturation/test.sh | 30 + .../rseqc_readdistribution/config.vsh.yaml | 52 + src/rseqc/rseqc_readdistribution/script.sh | 8 + src/rseqc/rseqc_readdistribution/test.sh | 24 + .../rseqc_readduplication/config.vsh.yaml | 82 + src/rseqc/rseqc_readduplication/script.sh | 16 + src/rseqc/rseqc_readduplication/test.sh | 25 + src/rseqc/tin/config.vsh.yaml | 85 + src/rseqc/tin/script.sh | 29 + src/rseqc/tin/test.sh | 33 + src/sortmerna/config.vsh.yaml | 65 + src/sortmerna/script.sh | 42 + src/sortmerna/test.sh | 41 + src/stringtie/config.vsh.yaml | 70 + src/stringtie/script.sh | 25 + src/stringtie/test.sh | 26 + src/summarizedexperiment/config.vsh.yaml | 48 + src/summarizedexperiment/script.sh | 18 + .../summarizedexperiment.r | 68 + src/tx2gene/config.vsh.yaml | 55 + src/tx2gene/script.sh | 24 + src/tx2gene/test.sh | 52 + src/tx2gene/tx2gene.py | 169 + src/tximport/config.vsh.yaml | 79 + src/tximport/script.sh | 23 + src/tximport/test.sh | 59 + src/tximport/tximport.r | 142 + src/ucsc/bedclip/config.vsh.yaml | 51 + src/ucsc/bedclip/script.sh | 5 + src/ucsc/bedclip/test.sh | 15 + src/ucsc/bedgraphtobigwig/config.vsh.yaml | 51 + src/ucsc/bedgraphtobigwig/script.sh | 5 + src/ucsc/bedgraphtobigwig/test.sh | 15 + .../config.vsh.yaml | 204 + .../genome_alignment_and_quant/main.nf | 431 + .../genome_alignment_and_quant/test_run.sh | 44 + .../merge_quant_results/config.vsh.yaml | 84 + src/workflows/merge_quant_results/main.nf | 67 + src/workflows/merge_quant_results/test_run.sh | 0 src/workflows/post_processing/config.vsh.yaml | 157 + src/workflows/post_processing/main.nf | 192 + src/workflows/post_processing/test_run.sh | 19 + src/workflows/pre_processing/config.vsh.yaml | 263 + src/workflows/pre_processing/main.nf | 286 + src/workflows/pre_processing/test_run.sh | 57 + src/workflows/prepare_genome/config.vsh.yaml | 168 + src/workflows/prepare_genome/main.nf | 311 + src/workflows/prepare_genome/test_run.sh | 50 + .../config.vsh.yaml | 88 + .../pseudo_alignment_and_quant/main.nf | 104 + .../pseudo_alignment_and_quant/test_run.sh | 47 + src/workflows/quality_control/config.vsh.yaml | 651 ++ src/workflows/quality_control/main.nf | 825 ++ src/workflows/quality_control/test_run.sh | 0 src/workflows/rnaseq/config.vsh.yaml | 914 +++ src/workflows/rnaseq/main.nf | 668 ++ src/workflows/rnaseq/nextflow.config | 14 + src/workflows/rnaseq/test.nf | 53 + src/workflows/rnaseq/test_run.sh | 69 + src/workflows/utils/labels.config | 44 + target/.build.yaml | 0 .../bbmap/bbmap_bbsplit/.config.vsh.yaml | 409 + .../nextflow/bbmap/bbmap_bbsplit/main.nf | 4130 ++++++++++ .../bbmap/bbmap_bbsplit/nextflow.config | 125 + .../bbmap/bbmap_bbsplit/nextflow_schema.json | 321 + .../bedtools_genomecov/.config.vsh.yaml | 378 + .../bedtools/bedtools_genomecov/main.nf | 4078 ++++++++++ .../bedtools_genomecov/nextflow.config | 126 + .../bedtools_genomecov/nextflow_schema.json | 303 + .../v0.3.1/nextflow/fastp/.config.vsh.yaml | 1124 +++ .../vsh/biobox/v0.3.1/nextflow/fastp/main.nf | 5007 ++++++++++++ .../v0.3.1/nextflow/fastp/nextflow.config | 126 + .../nextflow/fastp/nextflow_schema.json | 1131 +++ .../v0.3.1/nextflow/fastqc/.config.vsh.yaml | 381 + .../vsh/biobox/v0.3.1/nextflow/fastqc/main.nf | 4079 ++++++++++ .../v0.3.1/nextflow/fastqc/nextflow.config | 126 + .../nextflow/fastqc/nextflow_schema.json | 257 + .../nextflow/featurecounts/.config.vsh.yaml | 686 ++ .../v0.3.1/nextflow/featurecounts/main.nf | 4490 +++++++++++ .../nextflow/featurecounts/nextflow.config | 126 + .../featurecounts/nextflow_schema.json | 726 ++ .../nextflow/fq_subsample/.config.vsh.yaml | 231 + .../v0.3.1/nextflow/fq_subsample/main.nf | 3860 +++++++++ .../nextflow/fq_subsample/nextflow.config | 125 + .../fq_subsample/nextflow_schema.json | 160 + .../v0.3.1/nextflow/gffread/.config.vsh.yaml | 726 ++ .../biobox/v0.3.1/nextflow/gffread/main.nf | 4593 +++++++++++ .../v0.3.1/nextflow/gffread/nextflow.config | 126 + .../nextflow/gffread/nextflow_schema.json | 816 ++ .../kallisto/kallisto_index/.config.vsh.yaml | 259 + .../nextflow/kallisto/kallisto_index/main.nf | 3910 +++++++++ .../kallisto/kallisto_index/nextflow.config | 125 + .../kallisto_index/nextflow_schema.json | 192 + .../kallisto/kallisto_quant/.config.vsh.yaml | 287 + .../nextflow/kallisto/kallisto_quant/main.nf | 3958 +++++++++ .../kallisto/kallisto_quant/nextflow.config | 125 + .../kallisto_quant/nextflow_schema.json | 225 + .../v0.3.1/nextflow/multiqc/.config.vsh.yaml | 497 ++ .../biobox/v0.3.1/nextflow/multiqc/main.nf | 4336 ++++++++++ .../v0.3.1/nextflow/multiqc/nextflow.config | 126 + .../nextflow/multiqc/nextflow_schema.json | 529 ++ .../qualimap/qualimap_rnaseq/.config.vsh.yaml | 305 + .../nextflow/qualimap/qualimap_rnaseq/main.nf | 3989 ++++++++++ .../qualimap/qualimap_rnaseq/nextflow.config | 126 + .../qualimap_rnaseq/nextflow_schema.json | 217 + .../.config.vsh.yaml | 893 +++ .../rsem/rsem_calculate_expression/main.nf | 4622 +++++++++++ .../rsem_calculate_expression/nextflow.config | 125 + .../nextflow_schema.json | 839 ++ .../rsem_prepare_reference/.config.vsh.yaml | 457 ++ .../rsem/rsem_prepare_reference/main.nf | 4106 ++++++++++ .../rsem_prepare_reference/nextflow.config | 126 + .../nextflow_schema.json | 321 + .../rseqc/rseqc_bamstat/.config.vsh.yaml | 216 + .../nextflow/rseqc/rseqc_bamstat/main.nf | 3846 +++++++++ .../rseqc/rseqc_bamstat/nextflow.config | 126 + .../rseqc/rseqc_bamstat/nextflow_schema.json | 105 + .../rseqc_inferexperiment/.config.vsh.yaml | 242 + .../rseqc/rseqc_inferexperiment/main.nf | 3882 +++++++++ .../rseqc_inferexperiment/nextflow.config | 126 + .../nextflow_schema.json | 139 + .../rseqc_inner_distance/.config.vsh.yaml | 335 + .../rseqc/rseqc_inner_distance/main.nf | 4005 ++++++++++ .../rseqc_inner_distance/nextflow.config | 126 + .../rseqc_inner_distance/nextflow_schema.json | 209 + .../salmon/salmon_index/.config.vsh.yaml | 318 + .../nextflow/salmon/salmon_index/main.nf | 3994 ++++++++++ .../salmon/salmon_index/nextflow.config | 126 + .../salmon/salmon_index/nextflow_schema.json | 211 + .../salmon/salmon_quant/.config.vsh.yaml | 1214 +++ .../nextflow/salmon/salmon_quant/main.nf | 5009 ++++++++++++ .../salmon/salmon_quant/nextflow.config | 126 + .../salmon/salmon_quant/nextflow_schema.json | 1119 +++ .../samtools_flagstat/.config.vsh.yaml | 214 + .../samtools/samtools_flagstat/main.nf | 3838 +++++++++ .../samtools_flagstat/nextflow.config | 126 + .../samtools_flagstat/nextflow_schema.json | 105 + .../samtools_idxstats/.config.vsh.yaml | 224 + .../samtools/samtools_idxstats/main.nf | 3848 +++++++++ .../samtools_idxstats/nextflow.config | 126 + .../samtools_idxstats/nextflow_schema.json | 115 + .../samtools/samtools_index/.config.vsh.yaml | 230 + .../nextflow/samtools/samtools_index/main.nf | 3872 +++++++++ .../samtools/samtools_index/nextflow.config | 126 + .../samtools_index/nextflow_schema.json | 141 + .../samtools/samtools_sort/.config.vsh.yaml | 373 + .../nextflow/samtools/samtools_sort/main.nf | 4092 ++++++++++ .../samtools/samtools_sort/nextflow.config | 126 + .../samtools_sort/nextflow_schema.json | 309 + .../samtools/samtools_stats/.config.vsh.yaml | 442 ++ .../nextflow/samtools/samtools_stats/main.nf | 4154 ++++++++++ .../samtools/samtools_stats/nextflow.config | 126 + .../samtools_stats/nextflow_schema.json | 319 + .../nextflow/sortmerna/.config.vsh.yaml | 635 ++ .../biobox/v0.3.1/nextflow/sortmerna/main.nf | 4404 +++++++++++ .../v0.3.1/nextflow/sortmerna/nextflow.config | 125 + .../nextflow/sortmerna/nextflow_schema.json | 614 ++ .../star/star_align_reads/.config.vsh.yaml | 2704 +++++++ .../nextflow/star/star_align_reads/main.nf | 7046 +++++++++++++++++ .../star/star_align_reads/nextflow.config | 126 + .../star_align_reads/nextflow_schema.json | 2314 ++++++ .../star_genome_generate/.config.vsh.yaml | 374 + .../star/star_genome_generate/main.nf | 4037 ++++++++++ .../star/star_genome_generate/nextflow.config | 126 + .../star_genome_generate/nextflow_schema.json | 245 + .../nextflow/trimgalore/.config.vsh.yaml | 811 ++ .../biobox/v0.3.1/nextflow/trimgalore/main.nf | 4539 +++++++++++ .../nextflow/trimgalore/nextflow.config | 126 + .../nextflow/trimgalore/nextflow_schema.json | 711 ++ .../umi_tools_dedup/.config.vsh.yaml | 652 ++ .../umi_tools/umi_tools_dedup/main.nf | 4409 +++++++++++ .../umi_tools/umi_tools_dedup/nextflow.config | 126 + .../umi_tools_dedup/nextflow_schema.json | 635 ++ .../umi_tools_extract/.config.vsh.yaml | 475 ++ .../umi_tools/umi_tools_extract/main.nf | 4210 ++++++++++ .../umi_tools_extract/nextflow.config | 125 + .../umi_tools_extract/nextflow_schema.json | 430 + .../umi_tools_prepareforrsem/.config.vsh.yaml | 297 + .../umi_tools_prepareforrsem/main.nf | 3963 +++++++++ .../umi_tools_prepareforrsem/nextflow.config | 125 + .../nextflow_schema.json | 234 + .../prepare-for-rsem.py | 271 + .../v0.1.0/nextflow/untar/.config.vsh.yaml | 173 + .../craftbox/v0.1.0/nextflow/untar/main.nf | 3576 +++++++++ .../v0.1.0/nextflow/untar/nextflow.config | 125 + .../nextflow/untar/nextflow_schema.json | 119 + .../bedtools_genomecov/.config.vsh.yaml | 218 + .../bedtools_genomecov/bedtools_genomecov | 1206 +++ .../bedtools_genomecov/nextflow_labels.config | 44 + .../cat_additional_fasta/.config.vsh.yaml | 222 + .../cat_additional_fasta/cat_additional_fasta | 1268 +++ .../nextflow_labels.config | 44 + target/executable/cat_fastq/.config.vsh.yaml | 209 + target/executable/cat_fastq/cat_fastq | 1233 +++ .../cat_fastq/nextflow_labels.config | 44 + .../copy_if_exists/.config.vsh.yaml | 183 + .../executable/copy_if_exists/copy_if_exists | 1136 +++ .../copy_if_exists/nextflow_labels.config | 44 + .../copy_if_exists/optional_file.txt | 1 + .../copy_if_exists/required_file.txt | 1 + target/executable/deseq2_qc/.config.vsh.yaml | 287 + .../deseq2_qc/deseq2_clustering_header.txt | 12 + .../deseq2_qc/deseq2_pca_header.txt | 11 + target/executable/deseq2_qc/deseq2_qc | 1540 ++++ .../deseq2_qc/nextflow_labels.config | 44 + target/executable/dupradar/.config.vsh.yaml | 307 + target/executable/dupradar/dupradar | 1444 ++++ target/executable/dupradar/dupradar.r | 154 + .../dupradar/nextflow_labels.config | 44 + .../executable/getchromsizes/.config.vsh.yaml | 207 + target/executable/getchromsizes/getchromsizes | 1168 +++ .../getchromsizes/nextflow_labels.config | 44 + target/executable/gtf2bed/.config.vsh.yaml | 185 + target/executable/gtf2bed/gtf2bed | 1104 +++ target/executable/gtf2bed/gtf2bed.pl | 122 + .../executable/gtf2bed/nextflow_labels.config | 44 + target/executable/gtf_filter/.config.vsh.yaml | 195 + target/executable/gtf_filter/gtf_filter | 1189 +++ .../gtf_filter/nextflow_labels.config | 44 + target/executable/gunzip/.config.vsh.yaml | 184 + target/executable/gunzip/gunzip | 1110 +++ .../executable/gunzip/nextflow_labels.config | 44 + .../multiqc_custom_biotype/.config.vsh.yaml | 205 + .../biotypes_header.txt | 11 + .../multiqc_custom_biotype | 1264 +++ .../nextflow_labels.config | 44 + .../picard_markduplicates/.config.vsh.yaml | 247 + .../nextflow_labels.config | 44 + .../picard_markduplicates | 1258 +++ .../prepare_multiqc_input/.config.vsh.yaml | 452 ++ .../prepare_multiqc_input/multiqc_config.yml | 164 + .../nextflow_labels.config | 44 + .../prepare_multiqc_input | 2755 +++++++ .../.config.vsh.yaml | 178 + .../nextflow_labels.config | 44 + .../preprocess_transcripts_fasta | 1106 +++ .../preseq_lcextrap/.config.vsh.yaml | 231 + .../preseq_lcextrap/nextflow_labels.config | 44 + .../preseq_lcextrap/preseq_lcextrap | 1184 +++ .../rsem_merge_counts/.config.vsh.yaml | 221 + .../rsem_merge_counts/nextflow_labels.config | 44 + .../rsem_merge_counts/rsem_merge_counts | 1254 +++ .../rseqc_junctionannotation/.config.vsh.yaml | 300 + .../nextflow_labels.config | 44 + .../rseqc_junctionannotation | 1430 ++++ .../rseqc_junctionsaturation/.config.vsh.yaml | 289 + .../nextflow_labels.config | 44 + .../rseqc_junctionsaturation | 1394 ++++ .../rseqc_readdistribution/.config.vsh.yaml | 202 + .../nextflow_labels.config | 44 + .../rseqc_readdistribution | 1143 +++ .../rseqc_readduplication/.config.vsh.yaml | 251 + .../nextflow_labels.config | 44 + .../rseqc_readduplication | 1289 +++ .../rseqc/rseqc_tin/.config.vsh.yaml | 254 + .../rseqc/rseqc_tin/nextflow_labels.config | 44 + target/executable/rseqc/rseqc_tin/rseqc_tin | 1313 +++ target/executable/sortmerna/.config.vsh.yaml | 232 + .../sortmerna/nextflow_labels.config | 44 + target/executable/sortmerna/sortmerna | 1313 +++ target/executable/stringtie/.config.vsh.yaml | 256 + .../stringtie/nextflow_labels.config | 44 + target/executable/stringtie/stringtie | 1306 +++ .../summarizedexperiment/.config.vsh.yaml | 235 + .../nextflow_labels.config | 44 + .../summarizedexperiment/summarizedexperiment | 1269 +++ .../summarizedexperiment.r | 68 + target/executable/tx2gene/.config.vsh.yaml | 225 + .../executable/tx2gene/nextflow_labels.config | 44 + target/executable/tx2gene/tx2gene | 1246 +++ target/executable/tx2gene/tx2gene.py | 169 + target/executable/tximport/.config.vsh.yaml | 292 + .../tximport/nextflow_labels.config | 44 + target/executable/tximport/tximport | 1450 ++++ target/executable/tximport/tximport.r | 142 + .../executable/ucsc/bedclip/.config.vsh.yaml | 204 + target/executable/ucsc/bedclip/bedclip | 1131 +++ .../ucsc/bedclip/nextflow_labels.config | 44 + .../ucsc/bedgraphtobigwig/.config.vsh.yaml | 204 + .../ucsc/bedgraphtobigwig/bedgraphtobigwig | 1131 +++ .../bedgraphtobigwig/nextflow_labels.config | 44 + .../.config.vsh.yaml | 641 ++ .../genome_alignment_and_quant | 1851 +++++ .../nextflow_labels.config | 44 + .../merge_quant_results/.config.vsh.yaml | 322 + .../merge_quant_results/merge_quant_results | 905 +++ .../nextflow_labels.config | 44 + .../post_processing/.config.vsh.yaml | 528 ++ .../post_processing/nextflow_labels.config | 44 + .../workflows/post_processing/post_processing | 1421 ++++ .../workflows/pre_processing/.config.vsh.yaml | 681 ++ .../pre_processing/nextflow_labels.config | 44 + .../workflows/pre_processing/pre_processing | 1779 +++++ .../workflows/prepare_genome/.config.vsh.yaml | 556 ++ .../prepare_genome/nextflow_labels.config | 44 + .../workflows/prepare_genome/prepare_genome | 1550 ++++ .../.config.vsh.yaml | 328 + .../nextflow_labels.config | 44 + .../pseudo_alignment_and_quant | 946 +++ .../quality_control/.config.vsh.yaml | 1615 ++++ .../quality_control/nextflow_labels.config | 44 + .../workflows/quality_control/quality_control | 4345 ++++++++++ .../workflows/rnaseq/.config.vsh.yaml | 2102 +++++ .../workflows/rnaseq/nextflow_labels.config | 44 + target/executable/workflows/rnaseq/rnaseq | 5392 +++++++++++++ .../bedtools_genomecov/.config.vsh.yaml | 218 + target/nextflow/bedtools_genomecov/main.nf | 3876 +++++++++ .../bedtools_genomecov/nextflow.config | 125 + .../bedtools_genomecov/nextflow_labels.config | 44 + .../bedtools_genomecov/nextflow_schema.json | 129 + .../cat_additional_fasta/.config.vsh.yaml | 222 + target/nextflow/cat_additional_fasta/main.nf | 3927 +++++++++ .../cat_additional_fasta/nextflow.config | 125 + .../nextflow_labels.config | 44 + .../cat_additional_fasta/nextflow_schema.json | 136 + target/nextflow/cat_fastq/.config.vsh.yaml | 209 + target/nextflow/cat_fastq/main.nf | 3862 +++++++++ target/nextflow/cat_fastq/nextflow.config | 125 + .../nextflow/cat_fastq/nextflow_labels.config | 44 + .../nextflow/cat_fastq/nextflow_schema.json | 116 + .../nextflow/copy_if_exists/.config.vsh.yaml | 183 + target/nextflow/copy_if_exists/main.nf | 3829 +++++++++ .../nextflow/copy_if_exists/nextflow.config | 124 + .../copy_if_exists/nextflow_labels.config | 44 + .../copy_if_exists/nextflow_schema.json | 105 + .../nextflow/copy_if_exists/optional_file.txt | 1 + .../nextflow/copy_if_exists/required_file.txt | 1 + target/nextflow/deseq2_qc/.config.vsh.yaml | 287 + .../deseq2_qc/deseq2_clustering_header.txt | 12 + .../nextflow/deseq2_qc/deseq2_pca_header.txt | 11 + target/nextflow/deseq2_qc/main.nf | 4173 ++++++++++ target/nextflow/deseq2_qc/nextflow.config | 125 + .../nextflow/deseq2_qc/nextflow_labels.config | 44 + .../nextflow/deseq2_qc/nextflow_schema.json | 182 + target/nextflow/dupradar/.config.vsh.yaml | 307 + target/nextflow/dupradar/dupradar.r | 154 + target/nextflow/dupradar/main.nf | 3992 ++++++++++ target/nextflow/dupradar/nextflow.config | 125 + .../nextflow/dupradar/nextflow_labels.config | 44 + target/nextflow/dupradar/nextflow_schema.json | 203 + .../nextflow/getchromsizes/.config.vsh.yaml | 207 + target/nextflow/getchromsizes/main.nf | 3841 +++++++++ target/nextflow/getchromsizes/nextflow.config | 125 + .../getchromsizes/nextflow_labels.config | 44 + .../getchromsizes/nextflow_schema.json | 117 + target/nextflow/gtf2bed/.config.vsh.yaml | 185 + target/nextflow/gtf2bed/gtf2bed.pl | 122 + target/nextflow/gtf2bed/main.nf | 3817 +++++++++ target/nextflow/gtf2bed/nextflow.config | 125 + .../nextflow/gtf2bed/nextflow_labels.config | 44 + target/nextflow/gtf2bed/nextflow_schema.json | 95 + target/nextflow/gtf_filter/.config.vsh.yaml | 195 + target/nextflow/gtf_filter/main.nf | 3876 +++++++++ target/nextflow/gtf_filter/nextflow.config | 125 + .../gtf_filter/nextflow_labels.config | 44 + .../nextflow/gtf_filter/nextflow_schema.json | 116 + target/nextflow/gunzip/.config.vsh.yaml | 184 + target/nextflow/gunzip/main.nf | 3820 +++++++++ target/nextflow/gunzip/nextflow.config | 125 + target/nextflow/gunzip/nextflow_labels.config | 44 + target/nextflow/gunzip/nextflow_schema.json | 95 + .../multiqc_custom_biotype/.config.vsh.yaml | 205 + .../biotypes_header.txt | 11 + .../nextflow/multiqc_custom_biotype/main.nf | 3930 +++++++++ .../multiqc_custom_biotype/nextflow.config | 125 + .../nextflow_labels.config | 44 + .../nextflow_schema.json | 128 + .../picard_markduplicates/.config.vsh.yaml | 247 + target/nextflow/picard_markduplicates/main.nf | 3902 +++++++++ .../picard_markduplicates/nextflow.config | 125 + .../nextflow_labels.config | 44 + .../nextflow_schema.json | 148 + .../prepare_multiqc_input/.config.vsh.yaml | 452 ++ target/nextflow/prepare_multiqc_input/main.nf | 4220 ++++++++++ .../prepare_multiqc_input/multiqc_config.yml | 164 + .../prepare_multiqc_input/nextflow.config | 125 + .../nextflow_labels.config | 44 + .../nextflow_schema.json | 415 + .../.config.vsh.yaml | 178 + .../preprocess_transcripts_fasta/main.nf | 3810 +++++++++ .../nextflow.config | 125 + .../nextflow_labels.config | 44 + .../nextflow_schema.json | 95 + .../nextflow/preseq_lcextrap/.config.vsh.yaml | 231 + target/nextflow/preseq_lcextrap/main.nf | 3883 +++++++++ .../nextflow/preseq_lcextrap/nextflow.config | 125 + .../preseq_lcextrap/nextflow_labels.config | 44 + .../preseq_lcextrap/nextflow_schema.json | 115 + .../rsem_merge_counts/.config.vsh.yaml | 221 + target/nextflow/rsem_merge_counts/main.nf | 3876 +++++++++ .../rsem_merge_counts/nextflow.config | 125 + .../rsem_merge_counts/nextflow_labels.config | 44 + .../rsem_merge_counts/nextflow_schema.json | 138 + .../rseqc_junctionannotation/.config.vsh.yaml | 300 + .../rseqc/rseqc_junctionannotation/main.nf | 3974 ++++++++++ .../rseqc_junctionannotation/nextflow.config | 125 + .../nextflow_labels.config | 44 + .../nextflow_schema.json | 193 + .../rseqc_junctionsaturation/.config.vsh.yaml | 289 + .../rseqc/rseqc_junctionsaturation/main.nf | 3957 +++++++++ .../rseqc_junctionsaturation/nextflow.config | 125 + .../nextflow_labels.config | 44 + .../nextflow_schema.json | 182 + .../rseqc_readdistribution/.config.vsh.yaml | 202 + .../rseqc/rseqc_readdistribution/main.nf | 3843 +++++++++ .../rseqc_readdistribution/nextflow.config | 125 + .../nextflow_labels.config | 44 + .../nextflow_schema.json | 105 + .../rseqc_readduplication/.config.vsh.yaml | 251 + .../rseqc/rseqc_readduplication/main.nf | 3909 +++++++++ .../rseqc_readduplication/nextflow.config | 125 + .../nextflow_labels.config | 44 + .../nextflow_schema.json | 150 + .../nextflow/rseqc/rseqc_tin/.config.vsh.yaml | 254 + target/nextflow/rseqc/rseqc_tin/main.nf | 3928 +++++++++ .../nextflow/rseqc/rseqc_tin/nextflow.config | 125 + .../rseqc/rseqc_tin/nextflow_labels.config | 44 + .../rseqc/rseqc_tin/nextflow_schema.json | 159 + target/nextflow/sortmerna/.config.vsh.yaml | 232 + target/nextflow/sortmerna/main.nf | 3904 +++++++++ target/nextflow/sortmerna/nextflow.config | 125 + .../nextflow/sortmerna/nextflow_labels.config | 44 + .../nextflow/sortmerna/nextflow_schema.json | 137 + target/nextflow/stringtie/.config.vsh.yaml | 256 + target/nextflow/stringtie/main.nf | 3922 +++++++++ target/nextflow/stringtie/nextflow.config | 125 + .../nextflow/stringtie/nextflow_labels.config | 44 + .../nextflow/stringtie/nextflow_schema.json | 168 + .../summarizedexperiment/.config.vsh.yaml | 235 + target/nextflow/summarizedexperiment/main.nf | 3888 +++++++++ .../summarizedexperiment/nextflow.config | 125 + .../nextflow_labels.config | 44 + .../summarizedexperiment/nextflow_schema.json | 155 + .../summarizedexperiment.r | 68 + target/nextflow/tx2gene/.config.vsh.yaml | 225 + target/nextflow/tx2gene/main.nf | 3888 +++++++++ target/nextflow/tx2gene/nextflow.config | 125 + .../nextflow/tx2gene/nextflow_labels.config | 44 + target/nextflow/tx2gene/nextflow_schema.json | 139 + target/nextflow/tx2gene/tx2gene.py | 169 + target/nextflow/tximport/.config.vsh.yaml | 292 + target/nextflow/tximport/main.nf | 3971 ++++++++++ target/nextflow/tximport/nextflow.config | 125 + .../nextflow/tximport/nextflow_labels.config | 44 + target/nextflow/tximport/nextflow_schema.json | 194 + target/nextflow/tximport/tximport.r | 142 + target/nextflow/ucsc/bedclip/.config.vsh.yaml | 204 + target/nextflow/ucsc/bedclip/main.nf | 3841 +++++++++ target/nextflow/ucsc/bedclip/nextflow.config | 125 + .../ucsc/bedclip/nextflow_labels.config | 44 + .../ucsc/bedclip/nextflow_schema.json | 105 + .../ucsc/bedgraphtobigwig/.config.vsh.yaml | 204 + target/nextflow/ucsc/bedgraphtobigwig/main.nf | 3841 +++++++++ .../ucsc/bedgraphtobigwig/nextflow.config | 125 + .../bedgraphtobigwig/nextflow_labels.config | 44 + .../bedgraphtobigwig/nextflow_schema.json | 105 + .../.config.vsh.yaml | 641 ++ .../genome_alignment_and_quant/main.nf | 4378 ++++++++++ .../nextflow.config | 125 + .../nextflow_labels.config | 44 + .../nextflow_schema.json | 506 ++ .../merge_quant_results/.config.vsh.yaml | 322 + .../workflows/merge_quant_results/main.nf | 3640 +++++++++ .../merge_quant_results/nextflow.config | 125 + .../nextflow_labels.config | 44 + .../merge_quant_results/nextflow_schema.json | 248 + .../post_processing/.config.vsh.yaml | 528 ++ .../workflows/post_processing/main.nf | 4010 ++++++++++ .../workflows/post_processing/nextflow.config | 125 + .../post_processing/nextflow_labels.config | 44 + .../post_processing/nextflow_schema.json | 423 + .../workflows/pre_processing/.config.vsh.yaml | 681 ++ .../nextflow/workflows/pre_processing/main.nf | 4291 ++++++++++ .../workflows/pre_processing/nextflow.config | 125 + .../pre_processing/nextflow_labels.config | 44 + .../pre_processing/nextflow_schema.json | 626 ++ .../workflows/prepare_genome/.config.vsh.yaml | 556 ++ .../nextflow/workflows/prepare_genome/main.nf | 4162 ++++++++++ .../workflows/prepare_genome/nextflow.config | 125 + .../prepare_genome/nextflow_labels.config | 44 + .../prepare_genome/nextflow_schema.json | 423 + .../.config.vsh.yaml | 328 + .../pseudo_alignment_and_quant/main.nf | 3672 +++++++++ .../nextflow.config | 125 + .../nextflow_labels.config | 44 + .../nextflow_schema.json | 244 + .../quality_control/.config.vsh.yaml | 1615 ++++ .../workflows/quality_control/main.nf | 5891 ++++++++++++++ .../workflows/quality_control/nextflow.config | 125 + .../quality_control/nextflow_labels.config | 44 + .../quality_control/nextflow_schema.json | 1510 ++++ .../workflows/rnaseq/.config.vsh.yaml | 2102 +++++ target/nextflow/workflows/rnaseq/main.nf | 6315 +++++++++++++++ .../nextflow/workflows/rnaseq/nextflow.config | 125 + .../workflows/rnaseq/nextflow_labels.config | 44 + .../workflows/rnaseq/nextflow_schema.json | 2144 +++++ 573 files changed, 424840 insertions(+) create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 README.md create mode 100644 README.qmd create mode 100644 _viash.yaml create mode 100644 assets/advanced_form.png create mode 100644 assets/global_params.png create mode 100644 assets/launch_workflow.png create mode 100644 assets/nextlow_execution.png create mode 100644 assets/parameter_set_1.png create mode 100644 assets/parameter_set_2.png create mode 100644 assets/seqera_cloud_execution.png create mode 100755 bin/get_minimal_test_data.sh create mode 100755 bin/get_unit_test_data.sh create mode 100644 examples/minimal.yaml create mode 100644 main.nf create mode 100644 nextflow.config create mode 100644 src/assets/methods_description_template.yml create mode 100644 src/assets/multiqc_config.yml create mode 100644 src/assets/optional_file.txt create mode 100644 src/assets/required_file.txt create mode 100644 src/assets/rrna-db-defaults.txt create mode 100644 src/bedtools_genomecov/config.vsh.yaml create mode 100644 src/bedtools_genomecov/script.sh create mode 100644 src/bedtools_genomecov/test.sh create mode 100644 src/cat_additional_fasta/config.vsh.yaml create mode 100644 src/cat_additional_fasta/script.py create mode 100644 src/cat_additional_fasta/test.sh create mode 100644 src/cat_fastq/config.vsh.yaml create mode 100644 src/cat_fastq/script.sh create mode 100644 src/cat_fastq/test.sh create mode 100644 src/deseq2_qc/config.vsh.yaml create mode 100644 src/deseq2_qc/deseq2_clustering_header.txt create mode 100644 src/deseq2_qc/deseq2_pca_header.txt create mode 100755 src/deseq2_qc/script.r create mode 100644 src/deseq2_qc/test.sh create mode 100644 src/dupradar/config.vsh.yaml create mode 100644 src/dupradar/dupradar.r create mode 100644 src/dupradar/script.sh create mode 100644 src/dupradar/test.sh create mode 100644 src/extra/copy_if_exists/config.vsh.yaml create mode 100644 src/extra/copy_if_exists/script.sh create mode 100644 src/getchromsizes/config.vsh.yaml create mode 100755 src/getchromsizes/script.sh create mode 100644 src/getchromsizes/test.sh create mode 100644 src/gtf2bed/config.vsh.yaml create mode 100755 src/gtf2bed/gtf2bed.pl create mode 100755 src/gtf2bed/script.sh create mode 100644 src/gtf2bed/test.sh create mode 100644 src/gtf_filter/config.vsh.yaml create mode 100644 src/gtf_filter/script.py create mode 100644 src/gtf_filter/test.sh create mode 100644 src/gunzip/config.vsh.yaml create mode 100755 src/gunzip/script.sh create mode 100644 src/gunzip/test.sh create mode 100644 src/multiqc_custom_biotype/biotypes_header.txt create mode 100644 src/multiqc_custom_biotype/config.vsh.yaml create mode 100755 src/multiqc_custom_biotype/script.py create mode 100644 src/multiqc_custom_biotype/test.sh create mode 100644 src/picard_markduplicates/config.vsh.yaml create mode 100755 src/picard_markduplicates/script.sh create mode 100644 src/picard_markduplicates/test.sh create mode 100644 src/prepare_multiqc_input/config.vsh.yaml create mode 100644 src/prepare_multiqc_input/script.sh create mode 100644 src/preprocess_transcripts_fasta/config.vsh.yaml create mode 100644 src/preprocess_transcripts_fasta/script.sh create mode 100644 src/preprocess_transcripts_fasta/test.sh create mode 100644 src/preseq_lcextrap/config.vsh.yaml create mode 100644 src/preseq_lcextrap/script.sh create mode 100644 src/preseq_lcextrap/test.sh create mode 100644 src/rsem_merge_counts/config.vsh.yaml create mode 100644 src/rsem_merge_counts/script.sh create mode 100644 src/rseqc/rseqc_junctionannotation/config.vsh.yaml create mode 100644 src/rseqc/rseqc_junctionannotation/script.sh create mode 100644 src/rseqc/rseqc_junctionannotation/test.sh create mode 100644 src/rseqc/rseqc_junctionsaturation/config.vsh.yaml create mode 100644 src/rseqc/rseqc_junctionsaturation/script.sh create mode 100644 src/rseqc/rseqc_junctionsaturation/test.sh create mode 100644 src/rseqc/rseqc_readdistribution/config.vsh.yaml create mode 100644 src/rseqc/rseqc_readdistribution/script.sh create mode 100644 src/rseqc/rseqc_readdistribution/test.sh create mode 100644 src/rseqc/rseqc_readduplication/config.vsh.yaml create mode 100644 src/rseqc/rseqc_readduplication/script.sh create mode 100644 src/rseqc/rseqc_readduplication/test.sh create mode 100644 src/rseqc/tin/config.vsh.yaml create mode 100644 src/rseqc/tin/script.sh create mode 100644 src/rseqc/tin/test.sh create mode 100644 src/sortmerna/config.vsh.yaml create mode 100755 src/sortmerna/script.sh create mode 100644 src/sortmerna/test.sh create mode 100644 src/stringtie/config.vsh.yaml create mode 100644 src/stringtie/script.sh create mode 100644 src/stringtie/test.sh create mode 100644 src/summarizedexperiment/config.vsh.yaml create mode 100755 src/summarizedexperiment/script.sh create mode 100755 src/summarizedexperiment/summarizedexperiment.r create mode 100644 src/tx2gene/config.vsh.yaml create mode 100755 src/tx2gene/script.sh create mode 100644 src/tx2gene/test.sh create mode 100755 src/tx2gene/tx2gene.py create mode 100644 src/tximport/config.vsh.yaml create mode 100755 src/tximport/script.sh create mode 100644 src/tximport/test.sh create mode 100755 src/tximport/tximport.r create mode 100644 src/ucsc/bedclip/config.vsh.yaml create mode 100644 src/ucsc/bedclip/script.sh create mode 100644 src/ucsc/bedclip/test.sh create mode 100644 src/ucsc/bedgraphtobigwig/config.vsh.yaml create mode 100644 src/ucsc/bedgraphtobigwig/script.sh create mode 100644 src/ucsc/bedgraphtobigwig/test.sh create mode 100644 src/workflows/genome_alignment_and_quant/config.vsh.yaml create mode 100644 src/workflows/genome_alignment_and_quant/main.nf create mode 100755 src/workflows/genome_alignment_and_quant/test_run.sh create mode 100644 src/workflows/merge_quant_results/config.vsh.yaml create mode 100644 src/workflows/merge_quant_results/main.nf create mode 100644 src/workflows/merge_quant_results/test_run.sh create mode 100644 src/workflows/post_processing/config.vsh.yaml create mode 100644 src/workflows/post_processing/main.nf create mode 100755 src/workflows/post_processing/test_run.sh create mode 100644 src/workflows/pre_processing/config.vsh.yaml create mode 100644 src/workflows/pre_processing/main.nf create mode 100755 src/workflows/pre_processing/test_run.sh create mode 100644 src/workflows/prepare_genome/config.vsh.yaml create mode 100644 src/workflows/prepare_genome/main.nf create mode 100755 src/workflows/prepare_genome/test_run.sh create mode 100644 src/workflows/pseudo_alignment_and_quant/config.vsh.yaml create mode 100644 src/workflows/pseudo_alignment_and_quant/main.nf create mode 100755 src/workflows/pseudo_alignment_and_quant/test_run.sh create mode 100644 src/workflows/quality_control/config.vsh.yaml create mode 100644 src/workflows/quality_control/main.nf create mode 100644 src/workflows/quality_control/test_run.sh create mode 100644 src/workflows/rnaseq/config.vsh.yaml create mode 100644 src/workflows/rnaseq/main.nf create mode 100644 src/workflows/rnaseq/nextflow.config create mode 100644 src/workflows/rnaseq/test.nf create mode 100755 src/workflows/rnaseq/test_run.sh create mode 100644 src/workflows/utils/labels.config create mode 100644 target/.build.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow.config create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow_schema.json create mode 100644 target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/prepare-for-rsem.py create mode 100644 target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/.config.vsh.yaml create mode 100644 target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/main.nf create mode 100644 target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/nextflow.config create mode 100644 target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/nextflow_schema.json create mode 100644 target/executable/bedtools_genomecov/.config.vsh.yaml create mode 100755 target/executable/bedtools_genomecov/bedtools_genomecov create mode 100644 target/executable/bedtools_genomecov/nextflow_labels.config create mode 100644 target/executable/cat_additional_fasta/.config.vsh.yaml create mode 100755 target/executable/cat_additional_fasta/cat_additional_fasta create mode 100644 target/executable/cat_additional_fasta/nextflow_labels.config create mode 100644 target/executable/cat_fastq/.config.vsh.yaml create mode 100755 target/executable/cat_fastq/cat_fastq create mode 100644 target/executable/cat_fastq/nextflow_labels.config create mode 100644 target/executable/copy_if_exists/.config.vsh.yaml create mode 100755 target/executable/copy_if_exists/copy_if_exists create mode 100644 target/executable/copy_if_exists/nextflow_labels.config create mode 100644 target/executable/copy_if_exists/optional_file.txt create mode 100644 target/executable/copy_if_exists/required_file.txt create mode 100644 target/executable/deseq2_qc/.config.vsh.yaml create mode 100644 target/executable/deseq2_qc/deseq2_clustering_header.txt create mode 100644 target/executable/deseq2_qc/deseq2_pca_header.txt create mode 100755 target/executable/deseq2_qc/deseq2_qc create mode 100644 target/executable/deseq2_qc/nextflow_labels.config create mode 100644 target/executable/dupradar/.config.vsh.yaml create mode 100755 target/executable/dupradar/dupradar create mode 100644 target/executable/dupradar/dupradar.r create mode 100644 target/executable/dupradar/nextflow_labels.config create mode 100644 target/executable/getchromsizes/.config.vsh.yaml create mode 100755 target/executable/getchromsizes/getchromsizes create mode 100644 target/executable/getchromsizes/nextflow_labels.config create mode 100644 target/executable/gtf2bed/.config.vsh.yaml create mode 100755 target/executable/gtf2bed/gtf2bed create mode 100755 target/executable/gtf2bed/gtf2bed.pl create mode 100644 target/executable/gtf2bed/nextflow_labels.config create mode 100644 target/executable/gtf_filter/.config.vsh.yaml create mode 100755 target/executable/gtf_filter/gtf_filter create mode 100644 target/executable/gtf_filter/nextflow_labels.config create mode 100644 target/executable/gunzip/.config.vsh.yaml create mode 100755 target/executable/gunzip/gunzip create mode 100644 target/executable/gunzip/nextflow_labels.config create mode 100644 target/executable/multiqc_custom_biotype/.config.vsh.yaml create mode 100644 target/executable/multiqc_custom_biotype/biotypes_header.txt create mode 100755 target/executable/multiqc_custom_biotype/multiqc_custom_biotype create mode 100644 target/executable/multiqc_custom_biotype/nextflow_labels.config create mode 100644 target/executable/picard_markduplicates/.config.vsh.yaml create mode 100644 target/executable/picard_markduplicates/nextflow_labels.config create mode 100755 target/executable/picard_markduplicates/picard_markduplicates create mode 100644 target/executable/prepare_multiqc_input/.config.vsh.yaml create mode 100644 target/executable/prepare_multiqc_input/multiqc_config.yml create mode 100644 target/executable/prepare_multiqc_input/nextflow_labels.config create mode 100755 target/executable/prepare_multiqc_input/prepare_multiqc_input create mode 100644 target/executable/preprocess_transcripts_fasta/.config.vsh.yaml create mode 100644 target/executable/preprocess_transcripts_fasta/nextflow_labels.config create mode 100755 target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta create mode 100644 target/executable/preseq_lcextrap/.config.vsh.yaml create mode 100644 target/executable/preseq_lcextrap/nextflow_labels.config create mode 100755 target/executable/preseq_lcextrap/preseq_lcextrap create mode 100644 target/executable/rsem_merge_counts/.config.vsh.yaml create mode 100644 target/executable/rsem_merge_counts/nextflow_labels.config create mode 100755 target/executable/rsem_merge_counts/rsem_merge_counts create mode 100644 target/executable/rseqc/rseqc_junctionannotation/.config.vsh.yaml create mode 100644 target/executable/rseqc/rseqc_junctionannotation/nextflow_labels.config create mode 100755 target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation create mode 100644 target/executable/rseqc/rseqc_junctionsaturation/.config.vsh.yaml create mode 100644 target/executable/rseqc/rseqc_junctionsaturation/nextflow_labels.config create mode 100755 target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation create mode 100644 target/executable/rseqc/rseqc_readdistribution/.config.vsh.yaml create mode 100644 target/executable/rseqc/rseqc_readdistribution/nextflow_labels.config create mode 100755 target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution create mode 100644 target/executable/rseqc/rseqc_readduplication/.config.vsh.yaml create mode 100644 target/executable/rseqc/rseqc_readduplication/nextflow_labels.config create mode 100755 target/executable/rseqc/rseqc_readduplication/rseqc_readduplication create mode 100644 target/executable/rseqc/rseqc_tin/.config.vsh.yaml create mode 100644 target/executable/rseqc/rseqc_tin/nextflow_labels.config create mode 100755 target/executable/rseqc/rseqc_tin/rseqc_tin create mode 100644 target/executable/sortmerna/.config.vsh.yaml create mode 100644 target/executable/sortmerna/nextflow_labels.config create mode 100755 target/executable/sortmerna/sortmerna create mode 100644 target/executable/stringtie/.config.vsh.yaml create mode 100644 target/executable/stringtie/nextflow_labels.config create mode 100755 target/executable/stringtie/stringtie create mode 100644 target/executable/summarizedexperiment/.config.vsh.yaml create mode 100644 target/executable/summarizedexperiment/nextflow_labels.config create mode 100755 target/executable/summarizedexperiment/summarizedexperiment create mode 100755 target/executable/summarizedexperiment/summarizedexperiment.r create mode 100644 target/executable/tx2gene/.config.vsh.yaml create mode 100644 target/executable/tx2gene/nextflow_labels.config create mode 100755 target/executable/tx2gene/tx2gene create mode 100755 target/executable/tx2gene/tx2gene.py create mode 100644 target/executable/tximport/.config.vsh.yaml create mode 100644 target/executable/tximport/nextflow_labels.config create mode 100755 target/executable/tximport/tximport create mode 100755 target/executable/tximport/tximport.r create mode 100644 target/executable/ucsc/bedclip/.config.vsh.yaml create mode 100755 target/executable/ucsc/bedclip/bedclip create mode 100644 target/executable/ucsc/bedclip/nextflow_labels.config create mode 100644 target/executable/ucsc/bedgraphtobigwig/.config.vsh.yaml create mode 100755 target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig create mode 100644 target/executable/ucsc/bedgraphtobigwig/nextflow_labels.config create mode 100644 target/executable/workflows/genome_alignment_and_quant/.config.vsh.yaml create mode 100755 target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant create mode 100644 target/executable/workflows/genome_alignment_and_quant/nextflow_labels.config create mode 100644 target/executable/workflows/merge_quant_results/.config.vsh.yaml create mode 100755 target/executable/workflows/merge_quant_results/merge_quant_results create mode 100644 target/executable/workflows/merge_quant_results/nextflow_labels.config create mode 100644 target/executable/workflows/post_processing/.config.vsh.yaml create mode 100644 target/executable/workflows/post_processing/nextflow_labels.config create mode 100755 target/executable/workflows/post_processing/post_processing create mode 100644 target/executable/workflows/pre_processing/.config.vsh.yaml create mode 100644 target/executable/workflows/pre_processing/nextflow_labels.config create mode 100755 target/executable/workflows/pre_processing/pre_processing create mode 100644 target/executable/workflows/prepare_genome/.config.vsh.yaml create mode 100644 target/executable/workflows/prepare_genome/nextflow_labels.config create mode 100755 target/executable/workflows/prepare_genome/prepare_genome create mode 100644 target/executable/workflows/pseudo_alignment_and_quant/.config.vsh.yaml create mode 100644 target/executable/workflows/pseudo_alignment_and_quant/nextflow_labels.config create mode 100755 target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant create mode 100644 target/executable/workflows/quality_control/.config.vsh.yaml create mode 100644 target/executable/workflows/quality_control/nextflow_labels.config create mode 100755 target/executable/workflows/quality_control/quality_control create mode 100644 target/executable/workflows/rnaseq/.config.vsh.yaml create mode 100644 target/executable/workflows/rnaseq/nextflow_labels.config create mode 100755 target/executable/workflows/rnaseq/rnaseq create mode 100644 target/nextflow/bedtools_genomecov/.config.vsh.yaml create mode 100644 target/nextflow/bedtools_genomecov/main.nf create mode 100644 target/nextflow/bedtools_genomecov/nextflow.config create mode 100644 target/nextflow/bedtools_genomecov/nextflow_labels.config create mode 100644 target/nextflow/bedtools_genomecov/nextflow_schema.json create mode 100644 target/nextflow/cat_additional_fasta/.config.vsh.yaml create mode 100644 target/nextflow/cat_additional_fasta/main.nf create mode 100644 target/nextflow/cat_additional_fasta/nextflow.config create mode 100644 target/nextflow/cat_additional_fasta/nextflow_labels.config create mode 100644 target/nextflow/cat_additional_fasta/nextflow_schema.json create mode 100644 target/nextflow/cat_fastq/.config.vsh.yaml create mode 100644 target/nextflow/cat_fastq/main.nf create mode 100644 target/nextflow/cat_fastq/nextflow.config create mode 100644 target/nextflow/cat_fastq/nextflow_labels.config create mode 100644 target/nextflow/cat_fastq/nextflow_schema.json create mode 100644 target/nextflow/copy_if_exists/.config.vsh.yaml create mode 100644 target/nextflow/copy_if_exists/main.nf create mode 100644 target/nextflow/copy_if_exists/nextflow.config create mode 100644 target/nextflow/copy_if_exists/nextflow_labels.config create mode 100644 target/nextflow/copy_if_exists/nextflow_schema.json create mode 100644 target/nextflow/copy_if_exists/optional_file.txt create mode 100644 target/nextflow/copy_if_exists/required_file.txt create mode 100644 target/nextflow/deseq2_qc/.config.vsh.yaml create mode 100644 target/nextflow/deseq2_qc/deseq2_clustering_header.txt create mode 100644 target/nextflow/deseq2_qc/deseq2_pca_header.txt create mode 100644 target/nextflow/deseq2_qc/main.nf create mode 100644 target/nextflow/deseq2_qc/nextflow.config create mode 100644 target/nextflow/deseq2_qc/nextflow_labels.config create mode 100644 target/nextflow/deseq2_qc/nextflow_schema.json create mode 100644 target/nextflow/dupradar/.config.vsh.yaml create mode 100644 target/nextflow/dupradar/dupradar.r create mode 100644 target/nextflow/dupradar/main.nf create mode 100644 target/nextflow/dupradar/nextflow.config create mode 100644 target/nextflow/dupradar/nextflow_labels.config create mode 100644 target/nextflow/dupradar/nextflow_schema.json create mode 100644 target/nextflow/getchromsizes/.config.vsh.yaml create mode 100644 target/nextflow/getchromsizes/main.nf create mode 100644 target/nextflow/getchromsizes/nextflow.config create mode 100644 target/nextflow/getchromsizes/nextflow_labels.config create mode 100644 target/nextflow/getchromsizes/nextflow_schema.json create mode 100644 target/nextflow/gtf2bed/.config.vsh.yaml create mode 100755 target/nextflow/gtf2bed/gtf2bed.pl create mode 100644 target/nextflow/gtf2bed/main.nf create mode 100644 target/nextflow/gtf2bed/nextflow.config create mode 100644 target/nextflow/gtf2bed/nextflow_labels.config create mode 100644 target/nextflow/gtf2bed/nextflow_schema.json create mode 100644 target/nextflow/gtf_filter/.config.vsh.yaml create mode 100644 target/nextflow/gtf_filter/main.nf create mode 100644 target/nextflow/gtf_filter/nextflow.config create mode 100644 target/nextflow/gtf_filter/nextflow_labels.config create mode 100644 target/nextflow/gtf_filter/nextflow_schema.json create mode 100644 target/nextflow/gunzip/.config.vsh.yaml create mode 100644 target/nextflow/gunzip/main.nf create mode 100644 target/nextflow/gunzip/nextflow.config create mode 100644 target/nextflow/gunzip/nextflow_labels.config create mode 100644 target/nextflow/gunzip/nextflow_schema.json create mode 100644 target/nextflow/multiqc_custom_biotype/.config.vsh.yaml create mode 100644 target/nextflow/multiqc_custom_biotype/biotypes_header.txt create mode 100644 target/nextflow/multiqc_custom_biotype/main.nf create mode 100644 target/nextflow/multiqc_custom_biotype/nextflow.config create mode 100644 target/nextflow/multiqc_custom_biotype/nextflow_labels.config create mode 100644 target/nextflow/multiqc_custom_biotype/nextflow_schema.json create mode 100644 target/nextflow/picard_markduplicates/.config.vsh.yaml create mode 100644 target/nextflow/picard_markduplicates/main.nf create mode 100644 target/nextflow/picard_markduplicates/nextflow.config create mode 100644 target/nextflow/picard_markduplicates/nextflow_labels.config create mode 100644 target/nextflow/picard_markduplicates/nextflow_schema.json create mode 100644 target/nextflow/prepare_multiqc_input/.config.vsh.yaml create mode 100644 target/nextflow/prepare_multiqc_input/main.nf create mode 100644 target/nextflow/prepare_multiqc_input/multiqc_config.yml create mode 100644 target/nextflow/prepare_multiqc_input/nextflow.config create mode 100644 target/nextflow/prepare_multiqc_input/nextflow_labels.config create mode 100644 target/nextflow/prepare_multiqc_input/nextflow_schema.json create mode 100644 target/nextflow/preprocess_transcripts_fasta/.config.vsh.yaml create mode 100644 target/nextflow/preprocess_transcripts_fasta/main.nf create mode 100644 target/nextflow/preprocess_transcripts_fasta/nextflow.config create mode 100644 target/nextflow/preprocess_transcripts_fasta/nextflow_labels.config create mode 100644 target/nextflow/preprocess_transcripts_fasta/nextflow_schema.json create mode 100644 target/nextflow/preseq_lcextrap/.config.vsh.yaml create mode 100644 target/nextflow/preseq_lcextrap/main.nf create mode 100644 target/nextflow/preseq_lcextrap/nextflow.config create mode 100644 target/nextflow/preseq_lcextrap/nextflow_labels.config create mode 100644 target/nextflow/preseq_lcextrap/nextflow_schema.json create mode 100644 target/nextflow/rsem_merge_counts/.config.vsh.yaml create mode 100644 target/nextflow/rsem_merge_counts/main.nf create mode 100644 target/nextflow/rsem_merge_counts/nextflow.config create mode 100644 target/nextflow/rsem_merge_counts/nextflow_labels.config create mode 100644 target/nextflow/rsem_merge_counts/nextflow_schema.json create mode 100644 target/nextflow/rseqc/rseqc_junctionannotation/.config.vsh.yaml create mode 100644 target/nextflow/rseqc/rseqc_junctionannotation/main.nf create mode 100644 target/nextflow/rseqc/rseqc_junctionannotation/nextflow.config create mode 100644 target/nextflow/rseqc/rseqc_junctionannotation/nextflow_labels.config create mode 100644 target/nextflow/rseqc/rseqc_junctionannotation/nextflow_schema.json create mode 100644 target/nextflow/rseqc/rseqc_junctionsaturation/.config.vsh.yaml create mode 100644 target/nextflow/rseqc/rseqc_junctionsaturation/main.nf create mode 100644 target/nextflow/rseqc/rseqc_junctionsaturation/nextflow.config create mode 100644 target/nextflow/rseqc/rseqc_junctionsaturation/nextflow_labels.config create mode 100644 target/nextflow/rseqc/rseqc_junctionsaturation/nextflow_schema.json create mode 100644 target/nextflow/rseqc/rseqc_readdistribution/.config.vsh.yaml create mode 100644 target/nextflow/rseqc/rseqc_readdistribution/main.nf create mode 100644 target/nextflow/rseqc/rseqc_readdistribution/nextflow.config create mode 100644 target/nextflow/rseqc/rseqc_readdistribution/nextflow_labels.config create mode 100644 target/nextflow/rseqc/rseqc_readdistribution/nextflow_schema.json create mode 100644 target/nextflow/rseqc/rseqc_readduplication/.config.vsh.yaml create mode 100644 target/nextflow/rseqc/rseqc_readduplication/main.nf create mode 100644 target/nextflow/rseqc/rseqc_readduplication/nextflow.config create mode 100644 target/nextflow/rseqc/rseqc_readduplication/nextflow_labels.config create mode 100644 target/nextflow/rseqc/rseqc_readduplication/nextflow_schema.json create mode 100644 target/nextflow/rseqc/rseqc_tin/.config.vsh.yaml create mode 100644 target/nextflow/rseqc/rseqc_tin/main.nf create mode 100644 target/nextflow/rseqc/rseqc_tin/nextflow.config create mode 100644 target/nextflow/rseqc/rseqc_tin/nextflow_labels.config create mode 100644 target/nextflow/rseqc/rseqc_tin/nextflow_schema.json create mode 100644 target/nextflow/sortmerna/.config.vsh.yaml create mode 100644 target/nextflow/sortmerna/main.nf create mode 100644 target/nextflow/sortmerna/nextflow.config create mode 100644 target/nextflow/sortmerna/nextflow_labels.config create mode 100644 target/nextflow/sortmerna/nextflow_schema.json create mode 100644 target/nextflow/stringtie/.config.vsh.yaml create mode 100644 target/nextflow/stringtie/main.nf create mode 100644 target/nextflow/stringtie/nextflow.config create mode 100644 target/nextflow/stringtie/nextflow_labels.config create mode 100644 target/nextflow/stringtie/nextflow_schema.json create mode 100644 target/nextflow/summarizedexperiment/.config.vsh.yaml create mode 100644 target/nextflow/summarizedexperiment/main.nf create mode 100644 target/nextflow/summarizedexperiment/nextflow.config create mode 100644 target/nextflow/summarizedexperiment/nextflow_labels.config create mode 100644 target/nextflow/summarizedexperiment/nextflow_schema.json create mode 100755 target/nextflow/summarizedexperiment/summarizedexperiment.r create mode 100644 target/nextflow/tx2gene/.config.vsh.yaml create mode 100644 target/nextflow/tx2gene/main.nf create mode 100644 target/nextflow/tx2gene/nextflow.config create mode 100644 target/nextflow/tx2gene/nextflow_labels.config create mode 100644 target/nextflow/tx2gene/nextflow_schema.json create mode 100755 target/nextflow/tx2gene/tx2gene.py create mode 100644 target/nextflow/tximport/.config.vsh.yaml create mode 100644 target/nextflow/tximport/main.nf create mode 100644 target/nextflow/tximport/nextflow.config create mode 100644 target/nextflow/tximport/nextflow_labels.config create mode 100644 target/nextflow/tximport/nextflow_schema.json create mode 100755 target/nextflow/tximport/tximport.r create mode 100644 target/nextflow/ucsc/bedclip/.config.vsh.yaml create mode 100644 target/nextflow/ucsc/bedclip/main.nf create mode 100644 target/nextflow/ucsc/bedclip/nextflow.config create mode 100644 target/nextflow/ucsc/bedclip/nextflow_labels.config create mode 100644 target/nextflow/ucsc/bedclip/nextflow_schema.json create mode 100644 target/nextflow/ucsc/bedgraphtobigwig/.config.vsh.yaml create mode 100644 target/nextflow/ucsc/bedgraphtobigwig/main.nf create mode 100644 target/nextflow/ucsc/bedgraphtobigwig/nextflow.config create mode 100644 target/nextflow/ucsc/bedgraphtobigwig/nextflow_labels.config create mode 100644 target/nextflow/ucsc/bedgraphtobigwig/nextflow_schema.json create mode 100644 target/nextflow/workflows/genome_alignment_and_quant/.config.vsh.yaml create mode 100644 target/nextflow/workflows/genome_alignment_and_quant/main.nf create mode 100644 target/nextflow/workflows/genome_alignment_and_quant/nextflow.config create mode 100644 target/nextflow/workflows/genome_alignment_and_quant/nextflow_labels.config create mode 100644 target/nextflow/workflows/genome_alignment_and_quant/nextflow_schema.json create mode 100644 target/nextflow/workflows/merge_quant_results/.config.vsh.yaml create mode 100644 target/nextflow/workflows/merge_quant_results/main.nf create mode 100644 target/nextflow/workflows/merge_quant_results/nextflow.config create mode 100644 target/nextflow/workflows/merge_quant_results/nextflow_labels.config create mode 100644 target/nextflow/workflows/merge_quant_results/nextflow_schema.json create mode 100644 target/nextflow/workflows/post_processing/.config.vsh.yaml create mode 100644 target/nextflow/workflows/post_processing/main.nf create mode 100644 target/nextflow/workflows/post_processing/nextflow.config create mode 100644 target/nextflow/workflows/post_processing/nextflow_labels.config create mode 100644 target/nextflow/workflows/post_processing/nextflow_schema.json create mode 100644 target/nextflow/workflows/pre_processing/.config.vsh.yaml create mode 100644 target/nextflow/workflows/pre_processing/main.nf create mode 100644 target/nextflow/workflows/pre_processing/nextflow.config create mode 100644 target/nextflow/workflows/pre_processing/nextflow_labels.config create mode 100644 target/nextflow/workflows/pre_processing/nextflow_schema.json create mode 100644 target/nextflow/workflows/prepare_genome/.config.vsh.yaml create mode 100644 target/nextflow/workflows/prepare_genome/main.nf create mode 100644 target/nextflow/workflows/prepare_genome/nextflow.config create mode 100644 target/nextflow/workflows/prepare_genome/nextflow_labels.config create mode 100644 target/nextflow/workflows/prepare_genome/nextflow_schema.json create mode 100644 target/nextflow/workflows/pseudo_alignment_and_quant/.config.vsh.yaml create mode 100644 target/nextflow/workflows/pseudo_alignment_and_quant/main.nf create mode 100644 target/nextflow/workflows/pseudo_alignment_and_quant/nextflow.config create mode 100644 target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_labels.config create mode 100644 target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_schema.json create mode 100644 target/nextflow/workflows/quality_control/.config.vsh.yaml create mode 100644 target/nextflow/workflows/quality_control/main.nf create mode 100644 target/nextflow/workflows/quality_control/nextflow.config create mode 100644 target/nextflow/workflows/quality_control/nextflow_labels.config create mode 100644 target/nextflow/workflows/quality_control/nextflow_schema.json create mode 100644 target/nextflow/workflows/rnaseq/.config.vsh.yaml create mode 100644 target/nextflow/workflows/rnaseq/main.nf create mode 100644 target/nextflow/workflows/rnaseq/nextflow.config create mode 100644 target/nextflow/workflows/rnaseq/nextflow_labels.config create mode 100644 target/nextflow/workflows/rnaseq/nextflow_schema.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a4527e3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.nextflow* +work +testData +test_results +target \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..d68b1de --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,20 @@ +# rnaseq v0.3.0 + +## Minor changes + +* Pin biobox version to v0.3.1 (PR #44). + +* Bump viash version to 0.9.4 (PR #45). + +## Bug fixes + +* Fix `summarizedexperiment` build PR (#42). + +* Fix an issue with the `deseq2_qc` component not being able to create the DESeq2 object (PR #41). + +## Known issues + +The following caveats are known and will be addressed in future releases: + +- [`bbmap_bbsplit` input file logic requires revision](https://github.com/viash-hub/rnaseq/issues/30) +- [Setting `--skip_deseq2_qc=false` results in an error](https://github.com/viash-hub/rnaseq/issues/31) diff --git a/README.md b/README.md new file mode 100644 index 0000000..5826ec5 --- /dev/null +++ b/README.md @@ -0,0 +1,242 @@ +# RNAseq.vsh + + + + +## Introduction + +This package contains the +[nf-core/rnaseq](https://github.com/nf-core/rnaseq) pipeline (version +3.14.0) in the [Viash framework](http://www.viash.io). We stick to the +nf-core pipeline as much as possible. This also means that we create a +subworkflow for the main stages of the pipeline as depicted in the +[nf-core README](https://github.com/nf-core/rnaseq). + +### Modular design + +The workflow is built in a modular fashion, where most of the base +functionality is provided by components from +[biobox](https://www.viash-hub.com/packages/biobox/latest), supplemented +by custom base components and workflow components in this package. This +architecture ensures both flexibility and reproducibility while +leveraging established bioinformatics tools. + +### Standardized components + +Each of the workflow components is implemented as a stand-alone module +with a standardized interface. This design philosophy offers several +advantages: + +1. Isolation of Tools and Functionality: Each subworkflow as well as + its individual components can be executed independently as + stand-alone entities when you need only specific functionality. + +2. Consistent Interfaces: All components follow standardized + input/output conventions, making it easy to connect or replace them. + +3. Reusability: Each component or workflow can be integrated seamlessly + as a dependency in another workflow. + +### Workflow Structure + +The end-to-end [rnaseq +workflow](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/rnaseq) +has 6 sub-workflows that can also be run independently. + +1. [Prepare + genome](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/prepare_genome): + Preparation of all the reference data required for downstream + analysis, i.e., uncompress provided reference data or generate the + required index files (for STAR, Salmon, Kallisto, RSEM, BBSplit). + +2. [Pre-processing](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/pre_processing): + Quality control on the input reads, performing FastQC, extracts + UMIs, trims adapters, and removal of ribosomal RNA reads. Adapters + can be trimmed using either Trim galore! or fastp (work in + progress). + +3. [Genome alignment and + quantification](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/genome_alignment_and_quant): + Genome alignment using STAR and transcript quantification using + Salmon or RSEM (using RSEM’s built-in support for STAR) (work in + progress). Alignment sorting and indexing, as well as computation of + statistics from the BAM files is performed using Samtools. UMI-based + deduplication is also performed. + +4. [Post-processing](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/post_processing): + Marking of duplicate reads (picard MarkDuplicates), transcript + assembly and quantification (StringTie), and creation of bigWig + coverage files. + +5. [Pseudo alignment and + quantification](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/pseudo_alignment_and_quant): + Pseudo alignment and transcript quantification using Salmon or + Kallisto. + +6. [Final + QC](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/quality_control): + A quality control workflow performing RSeQC, dupRadar, Qualimap, + Preseq, DESeq2 and featureCounts. It presents QC for raw reads, + alignments, gene biotype, sample similarity, and strand specificity + (MultiQC). + +## Example usage + +The rnaseq package is available via [Viash +Hub](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components), where +you can receive instructions on how to run the end-to-end workflow as +well as individual subworkflows or components. + +### Download test data + +As test data, we can use the small dataset nf-core provided with [their +`test` +profile](https://github.com/nf-core/test-datasets/blob/rnaseq3/samplesheet/v3.10/samplesheet_test.csv): +. + +A simple script has been provided to fetch those files from the github +repository and store them under `testData/minimal_test` (the +subdirectory is created to support `full_test` later as well): +`bin/get_minimal_test_data.sh`. + +Additionally, a script has been provided to fetch some additional +resources for unit testing the components. Thes will be stored under +`testData/unit_test_resources`: `bin/get_unit test_data.sh` + +The test data can be downloaded by running the following commands: + +``` bash +bin/minimal_test.sh +bin/get_minimal_test_data.sh +``` + +### Run the workflow + +To run the end-to-end workflow, browse to the +[rnaseq](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/rnaseq) +workflow on Viash Hub. Here you can find an overview on the formats of +the input and output files, as well as a detailed list of required and +optional parameters to run the workflow. + +The workflow can be run via the CLI with Nextflow or on Seqera Cloud. + +#### Run using Nextflow + +After having +[`nextflow`](https://www.nextflow.io/docs/latest/getstarted.html) +installed, we can now follow the instructions on screen by clicking +`launch`. + +![](assets/launch_workflow.png) + +1. The first step is to select the execution environment, which is + Nextflow in this example. + +![](assets/nextflow_execution.png) + +2. We can now fill in the parameters for the workflow. In this example, + use the locations of the test data that were downloaded earlier. We + select the `advanced form` option, to be able to process multiple + samples in parallel. + +![](assets/advanced_form.png) + +We fill out the global parameters first - those are the parameters that +apply to all samples. + +![](assets/global_params.png) + +Then, we fill in our parameter sets - this is one parameter set for each +samples. Note that each sample can consist of multiple fastq files. + +![](assets/parameter_set_1.png) ![](assets/parameter_set_2.png) + +3. Once we hit launch, we can execute the workflow by following the + instructions on the screen: + +``` bash +cat > params.yaml <<'EOM' +param_list: + - id: "WT_REP1" + fastq_1: [ "SRR6357070_1.fastq.gz", "SRR6357071_1.fastq.gz" ] + fastq_2: [ "SRR6357070_2.fastq.gz", "SRR6357071_2.fastq.gz" ] + strandedness: "reverse" + - id: "WT_REP2" + fastq_1: [ "SRR6357072_1.fastq.gz" ] + fastq_2: [ "SRR6357072_2.fastq.gz" ] + strandedness: "reverse" +fasta: "testData/minimal_test/reference/genome.fasta" +publish_dir: "full_pipeline_test/" +gtf: "testData/minimal_test/reference/genes.gtf.gz" +transcript_fasta: "testData/minimal_test/reference/transcriptome.fasta" +EOM + +nextflow run https://packages.viash-hub.com/vsh/rnaseq.git \ + -revision v0.2.0 \ + -main-script target/nextflow/workflows/rnaseq/main.nf \ + -params-file params.yaml \ + -latest \ + -resume +``` + +#### Run using Seqera Cloud + +It’s also possible to run the workflow directly on [Seqera +Cloud](https://cloud.seqera.io/). The required [Nextflow schema +files](https://nextflow-io.github.io/nf-schema/latest/nextflow_schema/nextflow_schema_specification/) +are provided with the workflows. Since Seqera Cloud does not support +multiple-value parameters when using the form-based input, we will use +Viash Hub to launch the +[workflow](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/rnaseq). + +1. First, we need to create an API token for your Seqera Cloud account. +2. Next, we can launch the workflow by selecting `Seqera Cloud` as + execution environment. Here you can add your API key, as well as the + Workspace ID and Compute Environment. + +![](assets/seqera_cloud_execution.png) + +3. We can now fill in the parameters, as described under + [`Run using Nextflow`](#run-using-nextflow). Note that a direct link + to the test data needs to be provided for Seqera Cloud execution, + e.g. to test data in a GitHub repository or data on a cloud storage + service. +4. By launching the workflow via Viash Hub, it will be executed on + Seqera Cloud in your workspace environment of choice. + +## (Optional) Resource Usage Tuning + +Nextflow’s labels can be used to specify the amount of resources a +process can use. This workflow uses the following labels for CPU and +memory: + +- `lowmem`, `midmem`, `highmem`, `veryhighmem` +- `singlecpu`, `lowcpu`, `midcpu`, `highcpu`, `veryhighcpu` + +The defaults for these labels can be found at +`src/workflows/utils/labels.config`. Nextflow checks that the specified +resources for a process do not exceed what is available on the machine +and will not start if it does. Create your own config file to tune the +labels to your needs, for example: + +``` yaml +// Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +``` + +When starting nextflow using the CLI, you can use the `-c` flag to +provide the file to NextFlow and overwrite the defaults. + +### Contributions + +This workflow was developed by Data Intuitive. Other contributions are +welcome. diff --git a/README.qmd b/README.qmd new file mode 100644 index 0000000..7785c03 --- /dev/null +++ b/README.qmd @@ -0,0 +1,158 @@ +--- +title: RNAseq.vsh +format: gfm +--- + + + +```{r, echo = FALSE, message = FALSE, error = FALSE, warning = FALSE} +library(tidyverse) +``` + +## Introduction + +This package contains the [nf-core/rnaseq](https://github.com/nf-core/rnaseq) pipeline (version 3.14.0) in the [Viash framework](http://www.viash.io). We stick to the nf-core pipeline as much as possible. This also means that we create a subworkflow for the main stages of the pipeline as depicted in the [nf-core README](https://github.com/nf-core/rnaseq). + +### Modular design +The workflow is built in a modular fashion, where most of the base functionality is provided by components from [biobox](https://www.viash-hub.com/packages/biobox/latest), supplemented by custom base components and workflow components in this package. This architecture ensures both flexibility and reproducibility while leveraging established bioinformatics tools. + +### Standardized components +Each of the workflow components is implemented as a stand-alone module with a standardized interface. This design philosophy offers several advantages: + +1. Isolation of Tools and Functionality: Each subworkflow as well as its individual components can be executed independently as stand-alone entities when you need only specific functionality. + +2. Consistent Interfaces: All components follow standardized input/output conventions, making it easy to connect or replace them. + +3. Reusability: Each component or workflow can be integrated seamlessly as a dependency in another workflow. + +### Workflow Structure + +The end-to-end [rnaseq workflow](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/rnaseq) has 6 sub-workflows that can also be run independently. + +1. [Prepare genome](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/prepare_genome): Preparation of all the reference data required for downstream analysis, i.e., uncompress provided reference data or generate the required index files (for STAR, Salmon, Kallisto, RSEM, BBSplit). + +2. [Pre-processing](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/pre_processing): Quality control on the input reads, performing FastQC, extracts UMIs, trims adapters, and removal of ribosomal RNA reads. Adapters can be trimmed using either Trim galore! or fastp (work in progress). + +3. [Genome alignment and quantification](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/genome_alignment_and_quant): Genome alignment using STAR and transcript quantification using Salmon or RSEM (using RSEM's built-in support for STAR) (work in progress). Alignment sorting and indexing, as well as computation of statistics from the BAM files is performed using Samtools. UMI-based deduplication is also performed. + +4. [Post-processing](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/post_processing): Marking of duplicate reads (picard MarkDuplicates), transcript assembly and quantification (StringTie), and creation of bigWig coverage files. + +5. [Pseudo alignment and quantification](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/pseudo_alignment_and_quant): Pseudo alignment and transcript quantification using Salmon or Kallisto. + +6. [Final QC](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/quality_control): A quality control workflow performing RSeQC, dupRadar, Qualimap, Preseq, DESeq2 and featureCounts. It presents QC for raw reads, alignments, gene biotype, sample similarity, and strand specificity (MultiQC). + +## Example usage + +The rnaseq package is available via [Viash Hub](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components), where you can receive instructions on how to run the end-to-end workflow as well as individual subworkflows or components. + +### Download test data + +As test data, we can use the small dataset nf-core provided with [their `test` profile](https://github.com/nf-core/test-datasets/blob/rnaseq3/samplesheet/v3.10/samplesheet_test.csv): . + +A simple script has been provided to fetch those files from the github repository and store them under `testData/minimal_test` (the subdirectory is created to support `full_test` later as well): `bin/get_minimal_test_data.sh`. + +Additionally, a script has been provided to fetch some additional resources for unit testing the components. Thes will be stored under `testData/unit_test_resources`: `bin/get_unit test_data.sh` + +The test data can be downloaded by running the following commands: + +``` bash +bin/minimal_test.sh +bin/get_minimal_test_data.sh +``` + +### Run the workflow + +To run the end-to-end workflow, browse to the [rnaseq](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/rnaseq) workflow on Viash Hub. Here you can find an overview on the formats of the input and output files, as well as a detailed list of required and optional parameters to run the workflow. + +The workflow can be run via the CLI with Nextflow or on Seqera Cloud. + +#### Run using Nextflow + +After having [`nextflow`](https://www.nextflow.io/docs/latest/getstarted.html) installed, we can now follow the instructions on screen by clicking `launch`. + +![](assets/launch_workflow.png) + +1. The first step is to select the execution environment, which is Nextflow in this example. + +![](assets/nextflow_execution.png) + +2. We can now fill in the parameters for the workflow. In this example, use the locations of the test data that were downloaded earlier. We select the `advanced form` option, to be able to process multiple samples in parallel. + +![](assets/advanced_form.png) + +We fill out the global parameters first - those are the parameters that apply to all samples. + +![](assets/global_params.png) + +Then, we fill in our parameter sets - this is one parameter set for each samples. Note that each sample can consist of multiple fastq files. + +![](assets/parameter_set_1.png) +![](assets/parameter_set_2.png) + +3. Once we hit launch, we can execute the workflow by following the instructions on the screen: + +``` bash +cat > params.yaml <<'EOM' +param_list: + - id: "WT_REP1" + fastq_1: [ "SRR6357070_1.fastq.gz", "SRR6357071_1.fastq.gz" ] + fastq_2: [ "SRR6357070_2.fastq.gz", "SRR6357071_2.fastq.gz" ] + strandedness: "reverse" + - id: "WT_REP2" + fastq_1: [ "SRR6357072_1.fastq.gz" ] + fastq_2: [ "SRR6357072_2.fastq.gz" ] + strandedness: "reverse" +fasta: "testData/minimal_test/reference/genome.fasta" +publish_dir: "full_pipeline_test/" +gtf: "testData/minimal_test/reference/genes.gtf.gz" +transcript_fasta: "testData/minimal_test/reference/transcriptome.fasta" +EOM + +nextflow run https://packages.viash-hub.com/vsh/rnaseq.git \ + -revision v0.2.0 \ + -main-script target/nextflow/workflows/rnaseq/main.nf \ + -params-file params.yaml \ + -latest \ + -resume +``` + +#### Run using Seqera Cloud + +It's also possible to run the workflow directly on [Seqera Cloud](https://cloud.seqera.io/). The required [Nextflow schema files](https://nextflow-io.github.io/nf-schema/latest/nextflow_schema/nextflow_schema_specification/) are provided with the workflows. Since Seqera Cloud does not support multiple-value parameters when using the form-based input, we will use Viash Hub to launch the [workflow](https://www.viash-hub.com/packages/rnaseq/v0.2.0/components/workflows/rnaseq). + +1. First, we need to create an API token for your Seqera Cloud account. +2. Next, we can launch the workflow by selecting `Seqera Cloud` as execution environment. Here you can add your API key, as well as the Workspace ID and Compute Environment. + +![](assets/seqera_cloud_execution.png) + +3. We can now fill in the parameters, as described under [`Run using Nextflow`](#run-using-nextflow). Note that a direct link to the test data needs to be provided for Seqera Cloud execution, e.g. to test data in a GitHub repository or data on a cloud storage service. +4. By launching the workflow via Viash Hub, it will be executed on Seqera Cloud in your workspace environment of choice. + +## (Optional) Resource Usage Tuning + +Nextflow’s labels can be used to specify the amount of resources a process can use. This workflow uses the following labels for CPU and memory: + +* `lowmem`, `midmem`, `highmem`, `veryhighmem` +* `singlecpu`, `lowcpu`, `midcpu`, `highcpu`, `veryhighcpu` + +The defaults for these labels can be found at `src/workflows/utils/labels.config`. Nextflow checks that the specified resources for a process do not exceed what is available on the machine and will not start if it does. Create your own config file to tune the labels to your needs, for example: + +``` yaml +// Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +``` + +When starting nextflow using the CLI, you can use the `-c` flag to provide the file to NextFlow and overwrite the defaults. + +### Contributions + +This workflow was developed by Data Intuitive. Other contributions are welcome. diff --git a/_viash.yaml b/_viash.yaml new file mode 100644 index 0000000..82ff45e --- /dev/null +++ b/_viash.yaml @@ -0,0 +1,24 @@ +name: rnaseq +version: v0.3.0 +viash_version: 0.9.4 + +config_mods: | + .requirements.commands := ['ps'] + .resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'} + .runners[.type == 'nextflow'].directives.tag := '$id' + .runners[.type == 'nextflow'].config.script := 'includeConfig("nextflow_labels.config")' + +repositories: + - name: biobox + type: vsh + repo: biobox + tag: v0.3.1 + - name: craftbox + type: vsh + repo: craftbox + tag: v0.1.0 + +info: + test_resources: + - path: gs://viash-hub-resources/rnaseq/v1 + dest: testData diff --git a/assets/advanced_form.png b/assets/advanced_form.png new file mode 100644 index 0000000000000000000000000000000000000000..811d3343ec36e104b56fb04b2e78cf0641cb9ed9 GIT binary patch literal 46759 zcmd?RWmH^Cv^7fOgy1B>T?4`02?-K3!M$;JcMXu>!KIPl5G1&}ySuwL?(#J`NzQ%W zJ@4Q9_r}=WW7E64WL3>tYt5R)7daU*lxKL)prD{oB*fo+f`Wp@gn|NmLxhKXqBXo) z1NlSKL_|bRLPUf_&eqD%#M}T1iYC$_qDk^C0Q<9`B*xoZ;qU1!>5261&++7uF`l(Y zdzugBn511e<6*F|VY5A}D1VQ_&WZg_*h?7BN95i3G)}r?&z?YG|99lLQ=95b+v?#> zIkq=uaJJXqiBRA)%Ir=VZdo~ga9NKQ=fVvTf061%)SgwFV0kHTg)pEl|4KP#?G0Gx za<*@5TD9g9>@Mj*ArGf`ra7Bq>WV>6w9@_IBdd$%be<&53Srb3iU14u$nxop6g?hx zBgWw;)(@YqK2C$KR&uy-+NsFTbAxdct<7b$4*0r?E;6$w@ zB74i`o{H@4-bIjzn+l*M7#rFwGb(Up#S$(gftwnjQ0ZT8x&H+|GI`Fn$5gn#sBx!%nrcA z)X30%`yWjqfjn|X`QL>>KB=Y>67e-tkP`oohENEJKo|da^QT~q#jQiXB1!!3hIm6B zEwleu$0(K)p-_;DJ~EU3N5>){kLuz7qhsE%8dzzhE$pwUDE_N0X{0_=|I12GUG+AD z{!%2*@;~Z&7cZotkxW5BA^!Gz& z1!87a)(6AQYjOP!zNf;|TCswr0+Yw16&!ejqBPFmrv04+?_!0%<6%)02@M*7l)?Vn zxO;k3)2n;(+OYtb+t1R+>0m(;b;rUfyW2ny23ZoHZ|Kf<;Y~3U_;G#8tb%kB9DH$4 z{e2DR1M5*DAz4UveIq48{x-|lR_WU>L!>=NTqX*3dvjP*eJLubOD5Xd_BjRdK+T>( ziC;(Do3rgZZLlWjv2$EGQ!z>D)u<`(>-{ZS_emDr`b6APM1$Lf7MBuI)v)m4Vn#tf zh+p>MPVf=m4wg#Wp=RFZ(kb(O-{!aEWP0kb)Ci}g1ggi&sT@jem@sTH--+vk%SCi& zBEHKrUuI zFr^^kg)l3E8xppL-gmH)x>9{*DmhcO@_mmS=WioAlDU#Hm#*iQr(AsaS^3^|G{eYg zjQifSGiNlgrp<3(H;~a9)(}mtQ zRSWvx3=ItWf*)N~7rnM%tf(8^hAdg!^?_?*)wKbmJ7eL3aTtE0eQV+Oq{g4T98g_- z`}|g{?Gh-XA|WsoYDXd|A=4#$XP08w9%C5rd%cqbAUgA5t+vkxlWP!P@>e6iBB1AO zm-LPUXa)H|g~MsPsKL-Pj0y!)MnYyn-(GQ}N$J%T@?&I9s@I23SpsTy)6Ar)o~WGu zpLE5IZx3t03?^fh6>0L$XWPsPH=ABrB3-E2Yqbu290`2Kcu^O9R)R6v1Bu;%V*;`8 zDERTyC}l>2k+)!v+-Vn&^A1j`p`wLaoL4vUIWkEm2#;VNVjO;L=VR`j*@lA5Op3k) zRwMm~`p+lB9!pK#174#WSnG@6g*O&+70l$<-9`(E4ku&853M0TL|U#T4J9RQwhL$noc*8l@^mstL z!Rs{tx=^j^8}Z|EP5=teUA)Y&Py^C%OV=mL;oC?AFPa?LRF19d2YfbX@6A0`jk_;h zk7A4N2NqGVQ!S|vH~uq3yp|#TDcn&4S*T6(`5HLXvArd95>aotRO;tmwVeo_7`o z{7#`^AtE}&WI9q4kA(^a zX+=gcXncdoYY|#cT*&iawkjq|G8S31nb_?v?)}$X^{Ozwqod0Z1Q?!#cq*MN8PIHY zl;;9=xDMdfLae;nMbok4T~dRKtZXp@79+kDPtfalSS<5xa1rzs0=6b+&%#f{>AgZc6)*In|qJN`CZv~Z3XX(7`e4~>#!U|IqCNY9lsBoK~tfg z_@JhixDb80bl+TG&-;w?gW480;rlWNp;#u}WmX(vIV<N?Pl&pX-NNgGqB$MltBPme0@RhkD=0H`Zs1vcPCL9D z+!t3EJ!zrl7K(_&{Mgba7)u1YO&rm#o>YhNvC_IYJ=W{mXtHXe8+y3E#@zaRqvdgZ ze$wDZG!BfomBI)*hazufVRzPW+d#1$B3z2nwUc2rXb z?luL;gW}J`uj*vfo9Sj*oVSHkD-9!k5|uVu5iiS~1lh>1l{Hys3R+{Lied(mD&vRZ z@tc%#VBJ~wkij-fDAlExVD|R9+Yw^H)df^eM*9@21lu}puL0A@gE`0ExFt_rfecp9 zvGXbyel(^5-A;|6UT7?;&{y`2j7_6!j^S91cBh)YZ0I;9i5wPW%Gyp@$N55me5|MS z+?(K~t1oeh3kh2+8;s*`*=es#YchZ@U-obH48r8s`^R8_Ni}VM%+}VO5 z_=2KZp2vzVayhyhPJUg%Z8EgSDa+GWhnkmtr;o0k&d|VFD6OW41xBg4c_+>gdN=7^ z@rjv__9{s&v(LwjUhb8Htc#0bZqZyFa9AzN6BbXvUX2vXiiNK1s$5=?mt35`B8?wN zz*)QojNaN8DgwX5J7>h@ln}Qv!uU6|aq*Bas{E7X5Q2F1r3lj$-#fX_L;IN zT(s<_lM$7>C#bj_`@X_!W5oithFMB^$i!l1TW3*df(<2YMg`Jy9ENBi4EU9Ww6^1& z47rMRu{A;mGy<49xDq+d-T)YA(Ww>HqqWrg;r>&vwTY_r3T3_&jKQ-(i*`Y8gtkyl zZN{kL+P6FoJH{tIPMw@(0&z%s3{&-vqCW-)OWS5pCk-JEfL&E53o&SJ4_Q){h}(8t zDyw-MseW1Kq{B$NC(7%tK;Pi~pkfbAr`MD3lY@pv`eG_h({jxh^EY6$g|(LLY~cPB z;2)nni;nn8$-4OiYNXl#fPCZL6*abT0URJO!w1?D%-<-ZY(#b(>sVcdg4+{FbB^X=VoSRPI@Ja`zDk%7(ui4 zaXAksPEoJ{>Ac{^1#_|d!A66+dx9SW=0qJeQ~_uBq{Vkz?0RxS0xCBrmAsab@98!Z zms2>b8l#>(g(_Av|NFR-?7snMI}~6(O62^q^&_h_8f&t~_lTFGFiR77=&UX9x@QD# zmG3CG^tZadtWiU6{uENi*dDDY!@ahe{gF3iK|~E)U%=I81X6uQAWKlrEp-nke}GZ~ zdWYa~$dv4Qzi3w}IWLhX+Gk$6B;?8ZE|PO$2ur91kZCc6R?Jtzz+nbTAy%&IEIoKc zo7CPdJqlQ?vIAXP8HsNb&UU8Mm0OJ&xf<<{S&S|W-Imh;Cu7cFS=%8g(Dc+s=tf;*`WE+q=`HX>s;gz6Rio~BhLrw$w2q!#PlIC#d z6-c>=4>NhTEf?J);bJjxkxXFB0IF^WGZ@`|yD{ZBVM?zhiKbhW;`G_4kR_49Qf>DQ zxZ;lh0PfHEwXG$0`r^j(ZN5KtQmB72@RCzJZsi}%uOvLJjiwjBHgXn?*1p+CP zl0%6Bb+%w{rGDQh0Fa50#J^J4{{Uq8n+mU>2dy`78}eiFwY&^=C1{phfn+1VdQ9`9 z7VGBENiS_MN>-s`UEgVVUkW}HCqM(&0RBrII`!rh5Ug6@ni zRdZul7V1^pbdt??yuN1mKFC2`DUrv?a!&IhTOJWUS6L{vv0?qBC#oNLOZNi+_Vkjt z9{iYa0DMfDn!rzS-7MbZgZ*K@>r0CXmhu<^?{s@j^iI_F>pPi@X8vT>a;7&+5kfN> zN{R8hUwdz7nYkNs^JbVNKqb-kXjr(^Ws8T~dO0lwhi$evY-Qg+OjhtbS!?XFuD0>v zcwcaXmnRhPlk(we9lUW&?YXAe0oZ7K*iBl&9xDH9kv~y0J@iH|IRK%#Om?P92+4Rt1wXw&T!y9Fn^k+=OTxfkarjxwTHz}qXa|zBVn``^N2YQ4 zE(dS3iMsWOSD)xH)Rv8D5_s6iK&FLq;QG4$T0)80lqJ?a)BLH0Yx|ex{1+;e0P@R@ z$mnQnEGM0{8);vXuPcbUEDFV1ny7?)*eB)U6Wsd3l;Th+%QkZSL?n{RZJ6X{nd#FBp<7m2@NV?e zw_`jT=@jyWz8eF26zI~>W}L+ejQ;wp3+rXFjl(-gOWhGVejBBI)QQzdG`D!5!=J7j ziN^UY^-radtU}5T;`(IxhZ&+IlhZ*#qWs<(C6PveiR02^eZ3_)grEUfzie`OtL3tD zZmYtYF9q>%GY|qxZl)M&GWFNUkxU{EN1&U%dFvsb1`C_WVfMP|woR|LUjJ2IzTMZM z-TEvCv#CD(;)V{WuawKZ@{rnlVJkQYo2o~v`Q*c`|hUeq$5YW2aWhT;TDUK_VH?~YonBoYWJ&nj=p%4 zB!O7|Vb|1(ti=RubB76)wfB)zVv0WGWRV z8h%v=)jE8Q(Ht|BUjIr*o$w+W81jc!5h_P&Z_h^=DWW)~{zF#LXhOeeAqP09l-emC`y5(&7m;HshVii?LGUQ?VD010*SA)Ef z!h6W5M8x;yS69RKW4+J145bhXk!rw?x%l?gVW5 z-u>zGJF(# zJpI9qu*IOkA^8-bn+%r-N4hqiwj@Cpqx{N-B7`=`pT^nDZZiJQIP?2db4B|5b4f^r z#AU8KpJR$MF`DZUhmlHWWb(=KN&V<0NgBuTwd z=8|{g$u~SK_l@E7=iHwA)iWMy4r*0~4KoiDR-ROs3tm;@`I>#vUuba`-0Wk98XuZ* zuvXVv7(S}yF=v=nG2m@8%WA`YDl?QW^Xe`UHYCorQc%A4JP8)t2;L!<2#y`tSoZPr zU5)_N*+)AGy7wtldOl?<0L)xi2nusyL;GL<9&Mrbq`>EhrZnPGt*;SWzQuqjw+0P+ zaW#+Of7`(~`%Rv3x+6%fRDXhx_3h)VI%A>=te5+pgx^^tu>d`Uxh!>^P{W-z@|VJ4 zRXwaf5TK=@Vd@%L>D>1E@S!w;%m$L0^aZu(8BFxD8xn`z_FfWubnoBXTRl=_1j(N&=W4(*G`cAk-IP`Sb&HUsO>`vc8zu!Kr`J^To z_*EnjjiUwM>2R7Bf6|K0B`tX?#k0j6jfkf^$x22hOv+yizW^pIv{FI@N!X z7nP$wH+f)q+x{i0)awx-FQW{a{kNBp0oOPfnEf;0H5A@?&yJD4XKW^X1A*3RQN@Gp ztQk&d!hpNYSs4tRiYN-_a2c*ORXDs7KHI^Yz zlQHa)w8d~jS50>YMG6uPGQ3}z(NN-`XB-1ae?d)F%2&*DJ0Z-Q&0@B4Qt0cOpC;M3 zB;usH6bi*k(hRsfTB0aPz!1_W@&O(Xp@p`aIO85^mYbOW0uxNyF69i)sM5dcPG9u! z;V_g+H9*x-6~}i1@a{(kz@5H?RVdh_n6>4z-F;;!IZm z%zWNwH9E|0$jH#7-k0s1rr93xvSvIt!BUB}dECEfp)P)qI~$gM>pa|Vw=tFo5Ij)TipB*OY* z%6;&c2B;xl84zzSU@cj7b%e74ytTh?p1^73Gh%Ylx|7(2XOi0)5)g_<<9GU$X79T_ z6MV{^o)Rb(*zHL4mkd<@EDTTHUmE%tj}?kgkN15Vg#kI*`e(j23>@^%P-60o&XDL@ zhrQlLkK2tzI!iWHV2nRP)2#=0-Z9KFU^vH$vFvqQza z?wFK@+P%@x^MS9J%2L&p-Q{pgr*|Fi{u-;!Vkz`}~LN%FmfN232E6oeL!>b>y{!yH2kQKscn$T6M= zGh?_Otqj92{s^)nPs5U#h$9YF73zwzeL~t9~&yt47C!U#N{D)#2U(li_c))f~lXH7d6qnmDemqZoPQA=@qTs{aMIj|S zdq-Vysrh)JvY2I!-q-O_A(<>vPLC~~o$)LY6KuszgL3e+((Xhtz9K&!TRvl@-HQp| zXRO6bX({&G<3(&=Uzf3bxaIw5_##xOW#mSH{Nz4pqz$sH%X0jfqenVE{#0SAC?P4y zsup+QKnzeYNPPdR@i|gD{xk%_*g9<1U)9NdPYEo4!v&I9hYRT*%LMPM?y%?g#Y|~Z zapR$zY?MyeUJYZENqw9m0zo2%SeEni%k|_UkVXQWq-@sC?36@L)|md$jd;Q85^E+) z70ezHFD#^G7`*Ig+EKkjfzM{7aAx;GWPsZ#2pg;(^gItcMVHCYq({ln=Gk{ik@e$nh&4pnsvr}ZJ@(u5L zZBkM67v+$s%f*M`B*_da6oa#?$WJ0UJk8A|wljFDRfc|-Ttp1;X!s=3Nt}_`=UUxd z?(dY|Xw%g>?xWI47Yi(8NnXlOg~QyU_V#X=)uZ(}BjPeu7nhqaXyoT8PZ=bK<)Pvm zjNJ?@vLxd}LIIZjZ!M$s2J#*>U*kY?PCca^bnD|iV^p{yuzrE(ah=lmNi<^!m&D() zTy7rmQx=fdj=F^(Mj^lH6^+`rfr#S#YLl8>SEUy z1v4x_ac$NU)J|ZXa|=roni1!cXG2es@)FT#q1daoDmQRZsmq#@Dx1}pDjMe?4OWBF z{-g%+J(~|y8Ujj<54r*NiiXnmZw`EY-Dk?oin%=-ty4^^=jCvC=sXb4FheZKh_}Od z?DuBgYk8UEk021y<4;;fi%R23+`~HzB+l|y9x@qht*^(k9rLKoYR~;}NP5{xWFH?tYrd)b+NOiqXI2RlZzI2&cjoF+ZnQ`bpWE@QdM9K>_$+^qF)EO|WRE|r<3?&xRjZhH(Z zF2M5K?(v}#2?(X#mcFOcA3F5N#)4a@GR;Bj3@a#ZnjeQlt5Gj+TU0@Tb&go7GfGY7 zc2EjMyIIadJ1kK85mPuD$z^+$TW&ldwWrjzy$;I>6c-%3=1&97yfoc5ENh_IZyms64!4ngy~Lw>a;uumwH}XW-Y19LY$Dm)1^r|_zw5z}7bm1xrIe6S^ZqtM+BuiV_rDaH`|;VI(OL08aszVW^jX55z# zc6Rr3d+^C<1ubx*6xziQSwiWq-yju_iDs=6$C9ju5qe2WMRI0mL+Bnyf*JaLKq$cv z!UOqI_ni-8A3H0wl$&zqD$O~!KIh*y@*_YHWI9n~QWWQ0$XlP(oEtSb1x`B%b9=GRsQ==ws{8WRUEIs)m=F~Lp2|udxLWuVHWk7~Bc|zQYoxY?Jo%ig z$20*`D1QCPAD0b>A|8)^_cDpSJP$P{SnYE)n*ns|<#^_CxjJY|#fSWLiQuv()SSAd z$mM#G`xL)I!G(JdE0JT{q;MGZJj+$i+RQ$>)y2S9&k-6|XI>dKqHzUqR9sRol{cmy z*L_|R7Ysvfl((A_6@t}-?1BDz`;dV z1^}9vdc!}u+D8bwgXP(!^qt3myX|S|_n|s1(s&^ZBEuNGb~it8bq)*3KeL;TGu<>} z0SIF#k>ir=)j6%A+9fiwio*tr)IC~wTM5R!*cGYyhBGq0+$^`e1WAW{FSnSQg^g%O z96xjyJ z=wJ508Ib21bgn#Sz4_udjNcipWD0B%r-ub)AI!RVuQWf4&mg+cK|ROyn!z!dsQm26 zz!HKW#x>7`mr@L($*Jl!WY3(p-m5T0|C!~wc%=E*f~`;P+=^{yY!JmbZ%)fPb^mu*5div^AN40 zUhg!`Md#}#L;T0#5&tcyA}qSAFce&7IiO0&6!=|ktzTIaxIJh@L@E~Ca3856i|yQ@ z;dMH(IeX6eAq?wng*I363NzDb61OPY-OJDU!4fK0Ru@%v4NJvqrUZoSmi~@uCmRP- zi>$OqbH2?1R}Fp$2q$Rg>g(X!`7HIEh$e2OVxcK0&!LOMMI%1HCCv`-n$SD=yG|6?R-8>+miA6fg8@8jHqG#21wf`|EAZ*SYdjQ#jCPzugBb zKNiR`CSF>hXBLVm9vH?;M8=$mTxG{;GOQY~ca44Uyp>KTNx*w>6qS~zn~>Ly>|2J^ zySpmiNYi(3x^kzuJ-QvA&DVA1-+~8t4xj?i`G&vyA-5=?Ad2=Km->F32-xKzLTaJO zkzXDFh(OP0rAyKX2LdvF#P0$)LKTgwwcA1$Xq|yoqOkj7)C|g+pYZph{jORoDqC0J zs}>sSE??S6UfKgMOcC-wml@AwAFN}S<7dBX{`}(H|4N2mxQhjnO4w&YqW_KLiNRB0 zLdD7r-P9LSVm`}>_K-Pe-W&lGJ#r|2j`?B9`f9p#UgMFY@O-l*6$GZi*J6MW?s~W5 z^4kkl0@hS?tLM-n0MQ8@Z);Dd(K9zBliaBn&=d z_pK`-dxKvsys8{#8tjVs6&>?N&`b5^WKJy~4-^>5+y-Xv+OB0tQ>{Jid^KT|s5!xw z_9z#;JL&;R94%*ASY5J-FQk@de3pvdpT@d|NlaEKGV1WPEKiP=yH3i{ zh+uFeo9%xHk<(1ZFIQ@HBfE^^x4dD_CjW=)^a)0at|0{wqKUVOq8RM$d?%#a58NYd zpW)_rsx+qNmlUHO8|9z5e^CH7&b&t*EfO(XD-A~&MO(aX zx0g!C5|ud?(uI?!KC<044L=wU+pM$lFwPbOvf-WHGCxDngVvBD8LU5Yd2ECs-Ez}a z=%C%syv9=Za(jM^Ok&g6cEjP5>cqDW;dGiUm}rufI1QIf2ypn%skgbli=ZI|s%y9> zGs9kQ?x+(J42=)g$(N2F0a`G%FL+wFW99E^YI(!;=BhM-^d~R8&P7#o3yOhwFzuHF z>eMB+1yA56f`B{ovw)+*Qp%pY*vvzt*&y4rqig-N8PrsL6y1rB7)GNLPD}uHPGOq7 zyG~A<{--N67v9j|LA~h<3*>#fN(gvC=MHmWDJNe~D&g@$B6#88`RQVfxRz*>_bxc8 z$Jv4B)~AP}Q^2OEMuTb#toA7%pGwU@j?Ofwa9n2wxD#>23*OaiRLkkwkp*d0$VhD{ z5eaVXdBBZP0HZ6LqB;r@Jhda0Wd=LsDO(51j8ZLmC<`iGQ3gj_Lew@N!b}pY8PUe$ z-HQvfW*QP4Cd{16q=38UW6RA@e_iAofeY~ zoe(^_=1ux{I%*K1jNvjqcH^f&TaE$;?Q3PMn(d+L6Dy2$edl!+OF^FA+`U07KsdXm z0;R&$s?+eA!T2U_uZy>*c)4TrOgABU7`KpkfJZNdS)-BEd3uMlq>4bf4Hh=CfYece zc+T2GX4IXTC*XPqy?^L}x^S*?bYq6WnX;K3&-df05T^3BFfe@09S75O+uhL_rhlv! zLQx>_h{^4lX+ZVU1%8C}Atzn2ZsV3_wYGUhbMzIJ9}ZR&7`bpV8_q!Kx=C5Z3L*Nb??v;d{aJfUx8GF&~D}JKU&4*4nc7v z&+TnUDKi4mRK7=bR|TRhQTu4g!2#bZ8;oIGA@eWAf~hO~{$Tt@7^X>5DmV%ShzF-0 z9t>jjUj9DW4PZpXIox+@SI^ZWSGau<$Xd#H=&&XCW;?qfcHTWFdC7>giCY+%p;MT% z7XB7wzlbka4}ooID`I7}!5YU}sBb*2$}_B~wjZrKHY*Ap(uDI*sC^2uv1IOb+28OX z*xH>=WH7x4uN|(DTumZ0t#!k=)SQb7^#|iyHuQ4RffCPSTuG=Bt{Az3CUR!W&GG>5 zuM%j3rvscYJx`NY`MPRS=dwi7zPJ{m;&aPoHy-54mL}rUO6=K8A%?x@dLRHlgn#DarJF z0^eYSt?=)ZQd$c(G=j>5M1Xilq?tZm+0PRuMB;pjKRFR`nGg+*=?%-W{p zJ6r_p567Bva0xrlP@PGZZ{8W$b_zL(5*)L{#jQF&z zs5xEw6MmKi*m$%F#?DtLp?KE&19=c8{UzetDWwHj3_mC|cWXOP2BB5;ut-TinT7)M*FAT1 z|0)1akBTS8VRJX~>F~%($7^UD4DH-g-Ge)0PLnW0g0YV)`dA=hVYr`Cggi1FXzed@ z6?3w8#|Gbh?#iX^6(bS$g0@2bQQ|FrkK-V+=v>l9V-mI-^3gq{)M*^?G?!d-2}246 zM2AR}xH@4`rx{q;n*(y(zQhB=Rd2PHV9`8|*U zZl(vv#L+m=;pP=AB#KN>%xfmbi6Ud~K(b*}+D*}%`Jxi9#HL=fz4SERjB z?JYug(H@*v@RE+t3EBqR_H@C|or9Kdgh9)Anvy6|i0eV6{4IDx2{O#SRI5Hv0Z1C* z+%q#Q@}~jR@a>$g{F|n2!@g$72dZ_BZ)Q}uCOUS(qS{9-?A{m-eFz1q%Qx01KV4QA z2F}3+#i#Cdp@{d!_~l5uK2EUG@98ma@6Vw_I5p+!N>V>cWwa(bfj4c%}9)6yj z;5gy6Os(yYax0ae!NWY##B9d~*NK9U35&%m*E$#imsl{mkp>M-d9c}i z(2j|bgH)h}FPG~wa27RF#7ol?O`b-}{XsDbZj=-!%V7*HrWkC)7ooQ!(9hf1_W1=4 zGt~|a6CVwbpydoET8B4)19ZwCF*wLJc<^o4^0YOS5KY5!u0Q3K_>vn@wdTu$5d=xQ z?;bjGh}b&wn;Y(3FqUb1fSvhWbpioMh?i63Q`v$A|J7OyiCodi%XLL(b|87zdJeLT zrl-z1o+-box`o!vXc=TYw2~>|TOdHrHp_)%Pj?IDNTX>mLDv_N-+&H6xKA+}q08sj zbSeVFx0x#d1E)Kf{h{Xw78cmcIgZdw(Yg<$7Lsv!p>Ch2T+|3_jdNl@vU~E z5hl)NI$Axz>0r7*Qh91T#{XsqYTQt@#=dZ8`*LSVL?GcJs|O(l-0>8QVuu#bW?m1I9?-yR`6xI#1IM!lbGb&SxXH6U#FIg{<)~?y@>y z+-Q)L7NN?_KaK)L2dj z^tSC{zSb;R>!`_@w$(c(OhIzN+ae)`hhwkLp`b8b2C`4?5W^nJGkpduanm0Mol(Y& z#Gzgq`p5|K3`Sx~IhBx4Gkr&?(@#6TI6lYR&R;}Rc@MWCvV0sUbKcD?!nSOmzy3M~XH|h!M#u|H@rql2iOzL2TiKVhmoclgM1~b0jLCha_IJ zbk|T9rAXlf#r*8NDqS}{@n&AW;K?6@Rx{n=9<(1VWNyy($?-1cYT*7s)VPC6%x4=(+M@c( zep5%kT=Z46Pu{!j>YO|q80ZXuR@J-<=z9MyFsz|iMe{AQ%`P~p%l*9G+XyjRNGR39 zG}A{1dJ#QB&gFKqYv`%{a$1NQcLi!%Y&j`Khy)`D$o*p>{tB|ihPnjibDz$H;a&7( z?ifN%Fb<8~Db8N+@yS_*SjzQM11hzz%{fh_p|d2Q7Ukn_iBWvE{FMok7Ck=WZ!_r` z5af_}Vy%no2pO8c>Otdu)&W1)ZV%1d8!i%5huL57!}6X7FU~l1K=cTgp6NRpXqR1% zkbxd(5Q4YZ{u!SMO68|332|Mhp2Oa&Li~&CLP&4^Y*V;&hOs>BM|kYn+L86h8jcPF>f%0_o{(yr{nsEi#E_FOXG{?_>LJU|O9p$YwCunG+KObwnMNgFJ|yNG2*K@i5#I0oLWP7#CI zms|I~$$@^BxV|`No}T!(BVKY=QPj{(ZTL_v7=B+S^wK(6x~EhSg9^sq#i<^C2cS#7 zR!S|x8^^GrK$iy)-c}TFByKqC=zEh4lko|cVB6TlbDg)3V> z?BRw~33?>E{3CqSO@K(@e$MlZhyIZ+r0pYOyCPrV<2pe!xS(g|asefSv_H~WWtTo! zX;gK<>hBlz3F&=C=pX1ZC5U&_p$IDCD7JFQBm&_7Y=u&a^A>CIt&t32Zp}XSNaZy8 zSrjqch4XrtTa=`Y9d94KROi6(;2m`WRLBB*33>KM`qAc)^bjN!W+Xy)DI_5c#BHBQ zH=GY4u(`5*#~IX81Ta5LeYkk68ctHM1HZ^4 z;Gme#kuw($NWD!DI`@ii{96qi~mQ8Cu9sgqEi9l^4hTZr5?l%6#A@Pk&G#h2l+-8f@ueF2{x)zA}cZ2_z(%`QkU>*ZozkFfi(I--`s-B2ujkbi5jb&Vc}NqP(ymV2kq#;sDGR$N{og2qpN=Za zmC`%^p#vf1VN=Q#swwsi4${?@9myx1Sz;0sGg8~3-tZ5C{?3yXtlY?Pxs%gVgSk9* zPgxlmnFv;+!5jXUhccbwh;cY$|B5t{p!hJ1-_dXgr2Nqt1CW}9!rkZ(D@I;ICa!e+ z_1`b!KnH#C-g$oZZW->M!*N2;Bc$F>U#;E}m4{3mD$UJb@!V5GmMOiPzXp8~`lV%A zMYQq%^S=OoA(sd+Z)P(Tnhg;qOV{@QRhTzRLKUzk579LQjV}7u{HY`+23QD3H z9=}%zzm7mXr8%!*t&2l1N_!zPqpY2H+W(sWQcv^0-_fA(-)a9BbdcXey75fO@IPT5 z^#t=WO;x)88{0JmL2_isbIC@Ld z&B%265EzO_|9D!^^pf_c$d=ooeJsO;}eMmzk;G!xc@c z?ZKj{<-YmC3U8g&cAhD!>$?B6)Num-dd9CQc79$X3f_h9dL9htASw0Xo1R#BkFRQQ z{no8z{l!QywEd8SS1TJabFoUVF7UcMWd8QM_bFJ z$;|f`qm?3N62m*>oSKd~?rgV&+>Yz<5NYDK?A?jT)KsHt0r#%(_078MZbeQFQWM2b z8)tc}C*U0I*zm>c@xOZNAm}HUK*gy3g~_{AA=eKcCb5gsiP=suyFKR=w4@!_iBWNs zIc2Q+;Jk{`t#Lf=7>DZiUZ5I`{!Z)0lb$3`wJzI!XGCJsqzfO~>Cs5*TKmA9x;J6W zXk-0L)b7KQ+A6_M_@;+5bu7A-uX|LkC`fPAIO8C~Bpg3L2x9pC^%jfmJ`*hs`b?Ek z)A*?E;k}nrJWIY(f$3s_@sS)V3Z^Sp=Ag;L>LxHC5|^3^EfQ*q>W_r|F?xZA@*cBi*gtuqa4ZC z`%4&aD3C}JxjtPYNq$Mv78^7Jwn$JuD=@VTD-=22+RzP!TY!sdx!ZC(iSS|mT*P6B zus~Y>O0bdFb3?Df<1oZ;{l+u}#Qv^UwnbR*yk{nErzuzXWL30rb`ZcPcsz0`2DDcY z1(qj93zKLh*^X8ljL)b{8e0ePKkD>yXWGVG+1@nQJU#8ZvW9UT@=^ao^4M#@urOz0_@JX{hKKt71 z_QY^H`Sm&_ICA7hrlQ94E~K{=TC|tVZh1vio6NpG&8bA=H5%;I1n+of#3DG)HK}@Q``!n z$0yZmqA@ast1k?ulb@I~hH=(o55<#{lVk4&s_lO?+yq}dxLxfy!j*PzpKU|^i6vG( zXf|sgz3_=^!4I3$i(*8{q}3^;!i=3|CDyI8+vBwC2*(PMU*HFvy6+_cD|{D7F3v*h zLsqp?45|s!5=D-|1!^NrvcD^GRKOLcn~TZJ!;b2J^9R5Ejg0@B>5is(VC%08mQ`?& z&?~Fxtm~oHA9}x>8T^Fb!w*fwfb*H;unKFv2_*US0c zZ}!73Q=d){+O2fVuJuK03|Ow~RD{!a#*=%o9KQ7pOK}ZNK=rdGRFN^ip|la)+tKl8 zL@6Rb9F#un5`bpxi|`t7A67zD@i4+r zdYD({Fm%KT1;IP-Kp?8g2RR73`Pxkdjdy#qqjes#DTdg#+VoIOCn|)xn1P-%Jyz-FL6tIOvs;9snnj>;5QV8{Mp^?kwxlh_BXd$m$6AYZK zHWqZI2|3_wPG(uWY7+(pbap!~OyuM2WS#q_zLiEGL`ydHVn4Cgut7s7oG!J0$X|M% zPQvf0*?i;hdG2mgn67m7YHX3?84A`r95AM=;nhYGC_u&sJ&SK|w&uu?60$SXPbsx>Ga(@38yQ)KVI)HcMqU~HZNQb<|nLX#X>J5woq+bu8UC!L&D-& zb=OnQe1{^v&vSD$BG)jZWp8AlPV((Tb_}bUpbe#m9XMhSq>H?LC&&;ncG&9p-Hrp@ zi5f4{?h>!P?S@p827)eXUE3^Bbp3G@6-1n_GS^+5wX5gse|Rkj8869=nxi^4LtQzI zLlt>Jh>I;d-2z@K@1H588-59bQwh7fxf!!5XNaK_z5D5Kv&Za!Bl@r3BNZ#A?`~n> zf;&g0_muN@O$ktN-veJ@zQHXxi%Z@*SrM4}(w=Xwggks9OV$=g6nls2zYb@HY`VN2{R{gyZ=*pmpKw>hy1s|LAEu_n z!An5;~(EzkU0>Vfa!flTuo;hgxJ^4;!7o ziBMJ;&AO>^tcL!lpnMr%#-j9mD-`e_ms3-vn7~sQ#0~>z}Kn?3`r3je?(o z6rd9o#;1z@YWJxG;_Fk_p16T*8kfG9j@TK|z-r=|$^!Y71lALi3HZJ9q6QbGMwqxg zdLW{?7;r_re-DhruTgkYu%=nBs!A3d}z#lnxl@<*4_VzJRShm#~g*k>< z{wK*FF+Pt((0wD82be#UwxC3i=n=>xZfNN?O-3rrfqC$HE>%Nq>~iJBXK?suk!3^b z&R0u~#TiacqlNciZ*M#b7{Im_}}>Yb1F%2R={OYp85t*$>~-O||X zy^HW>*^%+_V&~Ah$cM_|30b@pu82alh5O2_b$L(#Ne4JcY941c{}E?-l%k<8tS{=! zvV1;S#);{F!9#ts)~52oR7y;I`UStkt-f|Nhv1n7^@OF?C)&kgrgG6Kqz!&((s&$M zfM9`GGuA%DjY-WRzaq!!w@ebv&Q6&7B2O(JTgNIl z%|2m6c9kR%cGA}#FrcsFuZzhs#FnH#i@!8tjjKqkiBr=-|NNY}roWjKX^%Dk1@F9| zu*pDGI}PAjCR}a3e&T6P&R=;RV)e2{vFKJ7lO+BzX`+Hf93H>hZsmH_^9VAb{U7~x zBSqFG=F?>f#s#wL(hkiEj>)@P6Lg-ybhL zd)D8z_Oc0vRA5Ar%C0VU7xZICUP14J`OY^>XTq)?uBh8;F{g}%vjYnB#0ky@jx*9{ z(80a|p0xIfT${|9(QG4+eDxpg2oUO-9-YyG?8Kd_w|hZ*X4OcYtw+Mmk%Z&T5^SakoW^A{(yHHZVYo6QP`VS9Zb9~{7@a)h2XtxekL+_E5fUcHB*#2Ov8c7{+rl+7-PtA`6&L*ZF{dO-Xs<(Zcv_mC z97MX!Q}iw6h-n6H@r{6>bwhRRZKivlf63?GQ0iPIfvXa?E4oo>K?imcl9^AGTU7-~ z%IrDs`2<(wB9%$P7(=+>G7%!$RmZA~kj8qYQ%2gz(>)_hlcS^;i?EyuMi@&9WKf-~ zD2K6AZZ#~HbaQ1w=aPlW+;@kS7yVL#v-H9X^pwps3}0vD+T_tc!eSs!rQXD&I^Z$T zBV0H3SLWSDqec*j;xY1p&SpHlIojneDjYXgRSai)S@qCOK`vJLNN8 za+~uy@J^3G!|BaP^QFbqX1;a!;9g=Nar-3Fz*WSi&329FnO-KEK%p!KNkfm_q(L}q zejoDQqs)hW!uG3K%ljVTv7KPlVkor1mq&JX#pBh==Oqs;ou#^Fy-HgqVjZiv3JWfa zv8upRlF=8+|oCrroe&mJN z2B%uT8T(-Eu)6e&o2Y8yxS=Nub6rs7Sx`8u`+U3W!FWh-6G9;m^?lG}TZoWgk{V{+&Ek7UEtLQMHd4#-a z^pD;0CT4B zcjq@>Re+qUP+!fcS01iwa%{Bsn6GxkT~-zSiQ;9Xx(biv8AE@%5&Vk4p& zwL!5Z%m<^J4Sl=p}N(iwM))7$&+R+#2N9M zE`iw&obmari<76-2H{l%sQ_ga*lE_>iLaP6Ra+qL0qN2^Wc}Ne%#6+9%rQiO%&CnF z)X18`@%=pN2+?k`la6nF$I(YCgay20c(Y`E?bvgR>mPk!-W;Tb=yb|mW*w43+XQ}FUPLTbTtRTZ{U$Ag|}6O9lQjzZ!q~!kNG7*G-{$o?Ca1m zsRyj=Qk6TC>P?h-L_?wB7Mc!*Lp=|JNc2m7%y6q6gj(ld4CAL#0lQ)GK?L4qucfJf zGv@u17<(r3AO(GK(4s@rspIryPxa!nnkpfE>wfn*de{_FO!D`-@epR?71l4wuU4KZ$FN}f&?PRW;vr~uWtF;7@%KHI;#1+sz~8o9Q+kS!!_i}52u z26h-)0K$5!2|T2nV}F&Xu#K@fIUVU3zivt1AKTEV%uV3@YSt{Wq1cFWXpCN{jE$X* zoO(rjSVTFs88++Ws-pZHM`>g#os5(Zvl!~dwngbE@`7f0`=JT05cbk5PziiT&gNWA zO7d7fhw=e(>_@fJ2{ax;OhhH(&HPl(5LxpT8j5gyqnk{xlvI(eob{V^49aqo_IF@az`^K z^%yGXXoYhiI73(_omg%DZU*6*Far&iB7BBPK}0r@I>FzpLtYmAFu(K$PG%Z9*oIet zd(&0oIss=e^$h-v)oA(G&>Ae{*>rm^B@&P;$|>2xM2A}G{=%T6v+V*iGShT{1}Ent z@fav5?7-~jQ@%0&8tjMZRb8}Vb~3dwCN?6C;>H8hH8;NYItQE?Nn11-xV-Q5Z8x^O zYrZ1+6H|Q_1BXXG5Ozo3+vc_A6O^17il+~w-%O{bNJF0rCVYfkywRX1Lm@m|_x}9B zPwv**z(LE`%kNxkVI7vcVP8MNh%+BNLNEhcAi>Wz`(MBXbf0sRE;+iVP+;Y-KzOZd z(-hrP^om2e{iYs=_`#KsG|qkaL9IHwbg}FuoBh$-=g`2v?143sExh2jHg^`vLHM6e z>ESP3Y?jr;TkP0Bpld9A<6eMk#4s-PODFY2u>NR?08rYw`7??F!b%G|l?%z9O#e%E z1(YOE7h|<#jt*jbbUWJ*9>wDPY{`6-H2T?8bYow5E zE(6HEU>FA+A&hp~HngK<9llqEN*19Qr5+A$isWT(MfsCI61H>cwjh?VT8E4Mj*9u^ zyYY%(I_nZ9W2%Vdz&1WLEBU+QR#ZeJZuXp#qAwMb@PQ;5UGT_4`aD;bvZAmn!%uvR zE=KYt8g|m2*iJu}@gEc)vib%GRtVi$Tf`6!B^X;*ct(_uUfa*7@3oSNi!Td5@I%7N z`QW4@Y)m3?jZy^j!MQ zCV=7i3BK=NltI+!oMR$1@E(JXb^M2mfcB@5-@cWW`vfWD6$kHBq$isx=nKeKZP=JJ zUzMq?)z|!yqv*@mF)(F&WM`4NdPk?}Rxy9<%U_WY+}iN+_dGrj8saEH_cH~O;;B~u z6PSbryuN*7$u(7A(*r z35OiAGp{V{P@UvRynxTHH*iZhrp@|vaa*a~8V6bG9+YVB?X4d;OhD6c{6O;POzscPY!FlwneB6^BG(QWm_1HUmsdn+EYOyh#JiaQoP(I;Ro5!$ij08hPGG=_^>Q&N<2m~HZ%}{N)IC%i|!w4 ztG_ubUrJwHGF|!CSM~EW5};@DzGNuRrk`s5C+GM&vUQ*33ZalkO44Cm<3SlH8T%+x z4daBy?^ujNMdw``)05zHQbx>e^=^e<;H|>cW)a7q2p`Bj2erwkVUfTKCu7$g#z(c{2mZ45%>4Ezj;^fUkCrUoS9C7 zJDffDchCPbDTdzJq$$_ZpP@jwU&mpB9jI%);&?!RYHtd7ptgJb_kG!79C zm2}MI{lvPW7^9!^iI}>&+5N$J`eGjJ_tx^tD!6`(#FJl_{8J9HL>m>KMBj}0WcqT0 z_8DtuQV{XIJVev`{vzX6ZWDfTzAhJfAS|MO$wB@{srxA?QLw3?qo7EWb1IvW$B)0a zi2H(C_=g>JenikS%fVFS{@QVgVtz7CeZ4>8-oHn)$N5!?z=Xoh%uS&ECmEXY z{K2{2oa47&WVZjShs2|d`t(VogPD@^JS@3K;3;{x+y_FDxn{&*LO!HtyYIsYP5 zH687Lml^aw4ieGXlpkqUk?uz=MK%tVoqK5?+1bQ1v)~i@%DlVd{%&bh zB|rd?eN0jk^)C|r(Hmt+oB^03TpVcj3IEDzzMPBN{Tc^hF$~vsB3O70jb)Ra zIGi}4N*d-9|J;hkO1H1Qult~Y!7uqm6=pKH{yh`mjOmhNAm5Yvn~S)=svHH~FHSq$ zA7ZqBf4kQv>7Kvp5#>095EKCW>X718MfqPGWFY@VO!I#p z^#5;fH1ye5gugo>%8r=2Vz_GZb5Z5TJRgae7XGt)4u6uVP|VK)sBN7E*_VSjWc~ zgz6$MqxJsvyE<_E04_hjXc2mPo$`wEjA9~m6gAla(7jxXQ5cdtsB5TC>`^E?OrZJQ zGlF2f0EJp8Mb(1%)4+yJxEW?eW~IQ6Xky2E#QRW^LY&-cRJ~BHS!8TDWC;n&<+rgA zga2=1ii~*2O0KtmFkvy!+6UhwDY@rkgvc;E+|H3`2ZTVt+Ws?(VzJ0eh95m_3Kl%< zF(ZNi1%O@v&@BDFTcCDq>Nx}aZvewLD6MIIbW4i zc;oo-f3RZuxN-^#OjXdLer^UPmETHTjV8)A3`HKB#V_c6$j>)#?^P%T76Adhd}t`q zZm2!BtMng_qNzRfUv_hH1}VW-WxbH zcq%vB97@W^da=Lz7F^$*?LQQY_&Iof;ghM2y_U?$u<`WkQ%WhP?`KbtO1(s8&C+fL z59XUTd^wxj`HlCJeII`Ml|=%XqI?ab!BZWCTbl^dY0-lY3kp^UGKV{xIkzCpBy4RJ z7vE|Q?M}877GR4#fP=^S=R*z`4uoQ^u7-#E=a=8d{4}vBqM}f%#Gd^7$NzK6|JE6! zXo4@VsMzb>AS-J)xO^3de;sN@UTt)TMVHxp?ENlnThK8g&(}fM;B-|N|6EQHuk_BPwix5ir zADk&1SI~olvZ^HQ88Bo>F88&DHvFivub9M)))NGodr_zz1wDE4B-=SUAx>+Xn@wFK zT|5Bsb3(!^)SppUVoQA}0O00Oy&86ZKdO=l-nTmq*3d4~v$ztU-g86U8n^6kdM)E* zt`|evC5BD4vhz9CmL9uX!Ml6tiqB^otKSuStw*TD>9we|Sngidslb?bEEitv2gzQ~ zd!Kh3!EWZA`q87oEq5yiQ=?k87pXYz*Sje-vI`%ft80+dbJOd&4N+@F?Pjv+{bKUl z7WG2<@;eGP>q3*u)xDZaA)I;b+3Wpgql5?*jBU8l5+9=^t~EUmlU`=;=|1j{yRxD; zhWX{t=K6TytI?GOiP@%O7#fe`Bv;E|i_g_dp552BVFx#dv)(B-^9`_bo14SuXWRY+ zUd;tHxv4(m0?yl5y@HcRc`hHenHVz91i>>|Jv>6<={#s}AnO4eQ~z07wRnDA;lxSn zVi!N-_e;?TSf?UL1;2S+Rr9>-dEl|$*=+^O+Ub8&Na0VE3Er8kTIp7qx}smG!62QT zkws=!l$A89lw^Qi6J#OBTa^|xmYg64=?{-#l zcYJ_yAljoiTkuHWvs8bvHP{oknAB1hgS-mpPovxanPs<4_IB4D_l@4GS6@j5TzmAY zEzEfrIA#Y^h0PZ4_9MM+F-32ATfXTJrU}||+)NMoSRo(;p6!4B=Da`0f8cT0DlX`L z`Xxoctwx#>@2(|3q z`@)YvGn^@v+ZCiUhr5wi+Nsp4%m|y&s?3Ec0e^*7F36W?bw&1h)lTlb6(k~cop{q? zPC1dIOzF(%1}1tbvtLfjY&B3C%H!A5vfAGrPv?&=ym$fzjqF+IbiOqkQf#`uZB}7TQt9xjTF39KSdb`?2W( zC<(I8Qd-tdKfQXyxtE%D)5rB_Ev@Cq^y*47a(ZTB=Z2@{W`caTo!4s)qt^F3q*?9t zwe2D-0_||%th?D_&7x3)9DW10<=icn!Ndf?jR5t4uZDAbAdjsuLf9 z{PY+A1@c&1Ao%ZsjUw^OO?B-u3fHmOHa(ooxC2Xza07GaU3ymVi0WO8eKB?R%riuabgygg26H(LnE{ zi%z4vbsef%XnlRI5zp%D6!_V|$c;-g!_$vLC@Z6 z*sIC6zi7wcnDbFUKtNd5@^tZGVQ|<1Hq0tr=j~rF4l)r8=WCnV~*DOcKqg!dieMFMU2w@|jVLZ26d673`yWPEtjtk*Qm z%(F=Hl~1Q+A~y_#EgPCV&I5I74v(%6i_>OWe0CbDq4U<0-_e^cR&#AQ=c}meZxu{i z(cmpYANal3Qy(Fr`(q(|#X3P2Z!BviZeUOa@TCfz2BZ$@X+f+An=)IQ-BL>IWhajhEP)T4Wo8VF;?&^_9n%dYUr z|B8NM0P&_}fwWL~K`o6?Q$Jr3ve|-nRNmYjakL~`udF>WY__rPEe+J!7(9_A=4xCv zYKF0BM26qFEgg7k{eYxr@e{`4U8uE+Z7tqHXQTIS4`;8goiRl(LVVaJBaN=t&3JZ# zo}6Jr&|Yo&KCAYc#So3}kQKdM)-9}f;c`eQ&VD(kok&3D#jK$mH(3 zB{rZI>&cGigOEmPPSN<*hdv&8<9&Mj2OLYF2RBHkCpv}P9n1KTsR{gBJM|Ut&($Ly z=j(SZKb(2FUM;2H-hQ}Ti4ZpWgARJm*+8;0SDgf1Jv$miL2#~@ychTg+sM<1d}fs z&S=}|zc)S3s9EL)RY?QLXH|9%zw&RdMGO!X_~x5~#hT!|4K>lDw72IvrDAPx&)qMa zH6!2atPj=*xViDT56CTk6n8&Ceo&p^@&MK5h^|- z=j@w5i7rrklI@_!J4V-oTyUAA0q3Gtv(+{=07WsZL4wQvgqk7hFv;k)ZE-~0WkS_h z26{Td=GDG9-DXR;(w#de6`LrUA^mavwmO!dfBI~9jBN@8aSV7bW=+uZE*8?5b*Ew- zzjX22a#NUth$NHKjNGrCM>*)J;R{s7ny{7YUm^!}Z(E-GG~Y!wQ{J|;T>5yes0)L} z14Z010$rsi_8#xl&5CV54m?sH%XzN9a^9H2??|`6p)^_sG*y7&H-Tl)%$tsN-sKuIi#-UYv(e30)>x4Tu zy8U=k1wBlY&QB%N>Co6KztTZiyMykP=ReMgh{HTI-FlWMUtg*ZINO}6mE=F6hMyCTam0J40+zlHzpkuG3?IWU3z8(ZXn)fF zjOyPp?uU$`0m2k{e;VR@asC~ySOfrqJ)_!BLVd3Tf1f0e1H>;ha^>RpxA$)dlg@*T zC6|X@HSm81xu0HOw*OzI>&--O@%j9WUHbhbYo>_V*c9VGeylT(!M2|-Ki0)#KEp@M zSD9w5+mH{C^;-t-307BG!l32zg6CcCNv3gF@22Yme_P3R*kfBt;!ad#o#usr!h zTT}N9;Gz~zI>gmDdgXL05x#oKV|}v5YHP4iXy{i4i6}+8L~!0--C|-48T^=$K?V5D z`5aeCeBdJR=J+Jgb-`f0sX#X>F3$Y&k|3s>*`0hviK_aKYSd?GNlCzN@`Hp>U%Q==+QPqU+N*-sWWHM{diJ%05&`lNo{KLdJ`Mn+aRh#WdcwBy!jV z`sB&BOirlUj1))@%^`$jTRwA7PG$hQQJHCyY zsO7?5qUpzU5?C^xA%G#0TczcLTe4toqR#v}ZA>8$d?Qb?Ml&+f zE3n+2@bb_zf*{U!xm-vehzJ(}oTaysGHVSzdk5(|8RRtJS@PAOHHYZ2Vt z8!Z-{dOn`f;Tfr?oAW=4olVC|vEmf{zC;)Z=$7py3f`^pW=`_#jJ4Dl`Ls25^1SH~ zb6NYApCarr=xjB=MXbG@T6|jOQ#%cCgj-q9RF%{1E9|$Qi*Cgiy2zARx_>D?P89*q zEX+2~YCrXwEibRwl^k@=L<4>EwoEb75gl(|ttZZ%84YAMfUIipxOKsMEN{hoy?}ae z6q6>a*`}qLP@CqTZuUH| zS=jF{^kN1?WC+}uw$)#=H&5#Q;!tunjEr|+#>L7E=oq~)Th_l|SjRfL_M(YdCgRD@ zFukkiPSS=3Yux~^vSV7>M$HcRxr@iogQ3e6lw;D3AUH0|h4EDPQv@H3M^B!$3OL)? zq7(D5$K9nsle|RQU^9_H&nm4KyW&j%3gX*FjD<*Oo}=QWv-5_3wP@)<)Z*Kn&wJ4m z;K4^vL|Ty!Kj=3dEV>M9*Vm7`W#J_=18mvV%@0e?Hk!BfH*Oc^ue?;?z35_{I(^z_ zT=eHlYG!R(CWs5kVwqr8?=GSSQr%bGd{|-ObC_Ui9fA{xk--}lJ)R0i&XOv8MeR$6 zCZC^KKa#G3Ol!#yfW{b=%KJj28?utRpV1hR@wgl`ws z6ee|(xlgv_lCOEWYG1%{I(6z{?C~{sIjWcnsH-6*C+m50F_~In_j>40DQE&;SQNsAK z0C@XL{fBNvKm|3ImH$AX9hcK?R|>}ghij`Roa(iGtodAW%_^2%ra$3f5eP|@7!_jno+}` zz0prFSTh$@HLtA2YD8gnYtV$2w59J`Y>$~F_?z|kCOW+vVp^Fo=`@42%SP&G2cyXi ze~r%poKJGWT>`JtSsLjNsKfU769O#e1Y6UlBp2zmagI2Zv=s`i)A_GZr{c@jQ05H> zg5hC(@@;3)?+9gmih6AQDFD^-dmtQdn$xE=^TPU?G2~zUDYy?ydD|L0*>-sia4H7_ zq_$<(0U{prAI=-2i`7Yb3_HTG}09Q=z<=!q7tRqKjkpcHhI>F-|Q!znYaj zoq4W4CciPwngPRF_FYdWmu4-*uvn{5^V*SFTqT;99 zeWfuZBG*U62DYIKXEV%bH|2+zYc4H z?U|EHccGYk5ei&zB-zgZ!sTm>S`!^(F?6#VKSQ-q?>HXox$TG8v-+g@La5vCsJ`2c z!lzmrL}hElpG60~W79zrtlkTI(G1Y~!(iji7Edk9Ym|$lGf{@>YYo{7O5&3P`-&>p zQmpq2j$-AOWVHb+T^%oI7EpN7ruqPVQSHgZyU}EdHvX>yLw#fi{(Lt#SZPvcxFRyb zn8#WNvFTUM+q&SfCLMw?{!t%Qpzis6x_e{l@`P5$8|=O4K5Ml&Cf4NakGC_YQ#Tj&PH;^*(*F*-w!wi53;+=cJ5K^EMZI%G&iIdrFJXAuUf}UxPx5r}B@8uHlMn&JTpY3>Phk zvjtG}FM)S>Sp21JroI>VoEZBgCbKf6kqOq?+$jLfxliE&(DwUUf^rzDoF%!Osx&2O zGCl?2Z8mMf)K#;Gp{1%(bkR;7U{f8P>u384(D9vOsCU1?fv??kfTt#TK`_wi9VWf) zP|W;o2NJ7ahB1joHu3z0u!6|XdUocJY{Yb;Fd~Ocm^u~%GSxfci2$PV?T7T|If)NGG{?bDP%Hk}#MO!VX94MaG3k4O}3B z=z~QELK-cL2C({(;F4pwf8HsrR$%+=lpkygQpmO0+e~Y6yr&SW%=)5&{|3b;7-10(b{nD`np^|eRb$7x4P~dx(JgMjzi}rJ`oz33Q$C#ffKmN=ZDiv7#rhU3|DH+V93q1qti12bniHik@|j$e&Pa#%A;r zUsI+ycEmld(sNcf5_zJO}+jtmW~);xoNH*O-Pg9lgz$XI04 zrUzTz&Ly;X)@QiN@U|>DGkDc%zOmhGK2wKRlqB7}qRWOXG)5-7zJa-y$Eu(;U@Q>{(`y|SDgLw)kV`%-5@V|Xs9ym8Xo-ZoJrt3vnJ9zWz$S@ma+D+GEpK{ibBdK zf?x@$qXPtbOGa(Qb} zLsY*!{_LM|QY}3j=)R-fjTCL@JyxzC%_*GPj!T!Ru0Kt6K)1`jA?Su!)cO^ZAn8Pc z*&LsjK2E9B*Vzv!K5n~ut}n^$P69qOym)4Z@)@B7bC+!^hx{of85qIfYOFLk-P3)1 zLW+Ce9|gwM`;{nTn?Hk$dgK$C%m$>)aSLWKQeM~cy}`xhH8i`I1RqPa{VLnc2>i)O zWR2*pX?G^l6*@?Ti1nvOWye-{I+XiS_Sc=uvhi@A$v2;fex{plXQ$gAIDB^DlZFJl zkLFnk&&9n%KT&NLBBa87bT{mY@=7D6@L1CYCUuU>D#?!WPBpj9BT?7>J+X2|7MLqu zW;kSmgL{s9#Ehfco&I>5Z-0GKRox3-zUkTcO=BYsaW*R}1A($Zy)$ZyrVjR<-lv^T zg-}nIj+x@}elFR7LDC<>8LiQS^w~=lS_@|4J1Kl;%~fD1E{5K_m|j;v|58N5LgRhh z`k6Iy6G?cJ{SO{Mi$DBce&2z`2lu)W1IfswF{S*YLhzn?^%uNHfAY?95v_=j!?8E+ z3d>KbBO64^q1|8Uw8yms!EO*&*ZFX9bbj!xw)(jWl%Y7#&=bJpwNcIy&bT>GqST)HHeEYI>L8`%NmuD0E^?A&hm#VSV^eJ zy%0oUpvW>rN)+Q1Fn0~0tJ&Sy_e?L|ZNqouuI5_o5{n1?on8A46Qml^l!`=GWdPnl zIV&9Urz4u^@P$3yohj>HoDl0KNp?&W6*o|2pbNO`BLn%$`2YLE~)cQk9!!Q4nM8J~GGjz~d zMmBU7;kJ8EQw0|`V|(0^dY;j`?@Jws!m2>tsASv80_*#=83)Bq^6)sL**OgKzkxj( z=_Lb~D**(!Q>2%6T8upxrY@G9&v0d6K?ydpd0nA&QfNe`dMcbNmZSVX7D{|zCWhft zhXXKi?@*ELz63f+l64@_Lu-Jp$xSr=P~1flnI7F#RZ>FdynW?@`_}NBfn3yFb9L~6 zMHHJ|c#LihSR*yj+$j7lbZ}S|<%deTXr*AS4A@P6ZA{mVzu+hjVsQa9!$tB7*PV@@ zn=G5+{SG!Yyc+ugHQjgiSk4Nnc-scScOlhj%vvDWZZ1JgJ=WlVlW#H^=b8OsThXV- z^0*1x#=rAP@vEXN=4;fh4n^YmIAiW9{6|1D%#fPI7S2F_t~NVF(u186)>BCk*Hf{Y zvT*r)Aa@}7(BInMV8wwiq;*J6bBhz|0 z`1&_5#;TmUD3vb09}H0NbV0cM?Dcf(2)jvWL_o`~@Gjl*dR{3um}m{=tJh-O2O$bbB%R{p!P!p?eRglB|+qh@S2t$e6ZldCkV|AuK zwm(d;kT)$Xdb4a2krbwHJCp5Tju;ylxczX}c{R}^_jc414ffE;r^CoX-A-)&gjs`U zzJQHzU2Va8qRF75dl5j-E+(9I6uD=ZUoL#`>}QrNI%jfF%xaDe9>9xYU^9OgN_00Q zbemx*6z^2XX6v>0a{hKDMWucsW!*7^dqd3aQl=%h9=U5ywj~!rdsXHTKwef|$ZiB7 zmOx)h&y^|?Vg)Dcj-!e3EDLB$8aW1D4;hg$$5jQvn-6$OgLN&a&n1MPj6alXk7wmI z*sc@4`uv2o)@^tmwKR)YzQ}$zI$OWYP=YlldE*>)`%a}l%@Z_)149Eitt+7Pv z{5iZ7i3YBc1JAa#zM*Yobl_}B*Znm*;x1nPA?(Iz0^QD61LJ@9)7$jG!ar@HI0KpC3PTMGGW&g zd#t@mY-zN{QrN>14O1rzgNyCg3FfqXa6obR?{tzV#(2|b6Flx}d97_glK8d7rb$RN=vuv`RYzx9!?b04L0?~gl|Ef5jzi0o zU$4q$zu1-F>e)&BC~hWGzaH*cb}B}a0sHcoo%&+IXXAxN5F`*mQ(4jYn*i=*;BgVb zYmi3PqjlB1cUOsL%|YAaE;X+vXkv&kWD0b}gpFcMJ4sORJt`1?>s3v4intBAyw$Iz z0{{NPE#b?LV8Q z#1q=TB^$rfFf;JTG@8W1g8%aYiW}fS*8g{dEcKysS&g>>P2|j>+mMEGK`Iw`tUN6c zJdTAH*s=8QGgrw21yHcbDi5=MAT^hSkz^wM0C=Hd6w9a&KMDVyy?-mSfFOcfo}z({ z^ij9}pUL`l#1uiV#@rm2F0xuihr1HsADo$~;hhrPsgcqyddWK!6iEhP#?|G?b0mVb zcTwN;$|ix50AP0?05==V=LUbXpG7B}gZx7Ai6Pe2pT8x}wo=|)o4its{?(4+GmD-% z&@Jub>JMDt3EY5PU3qTcK5R;0;NMT7s$e{oC+>XO^Xz5y1xrJePn%2_*=th4n)99U z{7;q@HmPs-1S+dolB%a;cy)A1dGTJupr4A|KECxPMGwzA2$ByY5*$z_@mNG61iGkf zL(GTsnwzRT!@n8U4X%xJ6}OnyGpgW#kjM3b{2X_U4L`?!1fa2Xl4W)eszY#T2tOpu z4Pn~>mW9vmXw#<&mru_IlPX$)B*R_+%1{Fs&u&vN-G#A@cb%=ej%}cZIyeHBisF7U z-ECBkzQc3r10*53wAB-S?ZGo`FiBxgI>*%}Vk4nUrCT!vW(I`yA9JMETonWhu*n zRk-1RB`}wizobD<+JW$|G$422@_FQ(CL?SNZM;lPA`oZfWbPMTudl=YZhp7$Fne8q z!NHM2-%dP)!t~9X+Hg)5_l6Wf=h?3Tvi=jUQV;xye%0e3YBL4D|IhtqfUeF-N`Z>6 znMAFfy`E(g?+PUX3PPcx7E)1XDjOaEPiT7bc;Mxmz(8s5CPeg|mucQj^PZ2oS8Te8 zktw2XK?BiIx8NS^dpSg=uS!qAHlej-QGhg>A#-mt}JC*=Tw`QHR zzi+VI9P<1Wm)GLB>xqqj*RO0)C~1Xbx%K38jrkqrczqro;GW0vfE;QfR3iUBHK2Jo zeii%9QHeBak<{IfsnSCgcnQzqT9Wpe|ZZI8=r$nz8S zcRd07u;C)NPp&UYN#Ni|H7PRH3{QS?u?TklRseo@`0tkfulBC|9m?qKM|RnY)X2V; zHCZCVAlZ_okPM?FW#7gy*0D!+vNKsC`@W4eg)CVzwy~CwJ!Ln%kD~hi0q=Evuj_sN zG?#hidCs}d{ha%AKA-zO$L3sI8Z16NE8{+p?x+b-1eKPGs(1U-+K#WSUt$#F6W|l; z&Y-;mDCP#+9;uw(5bTi{KJw;nc66!YtcnT+XTm?00L)|Y6Q=*9u4a<#LX(Wk1ub}7zEQS&#G=P^bdQ^4koc}WjaqQ?;)rSwi zo6R)6Cc9^BGT)ApaNTuTlw1R0Uc-bZT_!MYjSP(0Lp9eQu0LHIu1Q%QE5WRjX&Z_? zbBLv45+8Q9!ad8@OnZ(TT1spH7a-D7D<3ZN;L6<6_QuY?G~i`856DyRjuhJ?p=|DB zO5+R%@CI(9RpVAttbep3ICqYMr0l^r8iR49Yo*)VMEg2$e9g!}!q5Y4?MP-ByX&P; z&DTANPg|5}N)Us#-&f<1pX%oU5e%~uaQSHVbUS>f3Oo2{Bs~mBmFfCl#R+ve{;0>o z|DhfO5i$2J8j;2_m<_YV&42Ey!i7iMnmy(i-)1@VjJ(?+zFo2sbFub`A63Hp@ zYCwLiZ5IW9gToP>U0vl9LVUC-4vPaNV`G;VUEx*pRr`wszI(gY6<~5yB2R?TC+eUt ze{&&nJP4VkD}U&Zz7#}uvhE*vkS;5DqOfnG7hzSa!-MeetY(_v^*TW36e7&D!Pk!N z@8*T%D|J4RcyHv9dbpl+pyq*dE+}Xhm)TkxwNz&;d;uj-n?Zu1?LAd^H<|~4RIr@e z8tQHe)!$%fBGU=8d>nQGJUm)y`=!%PvQZ6{0f=M)%EN#49)quPZgV->v zmmEOr^*1<;jl!Z|hg!F<5e(L9sC(^~LTi3n->~2JXe4WS-X7^&q zoleH0(#zLcaSOn7J$0Zc(Txp>LK4u|?k4W-%iM6-j^AK)3SHHHh18Q8s;}?ctTByP zjwk1Y)(e%J0&df`$64xli99fwU2J5sEs;hbjOrj_=^+(HN4mEYvgK#5uZ$8Ia!1jGYboLi%iVTf%V4Dsn6LcDGU1ofzD z0KGpFGPf#ME4gh8?OJ$)t(Gb{^MO;8WrYje@EV`;LolBkfn$;lfm1p#(kTxCGylL@ zWSMwnw9#zga+>Qu6y&j!WoxEXn%N|U7UJsqEAF;#voHoiw|0S`{yo&mn(=3~mLl>c ze+H)ANM^TgWZ>*$VX3+UJTpht3|!j41C}!Nk#}q-?{C zJkW0bz*^xXx*-$*B-TF~R@YfCb&?HX57*=w$?Tfxh{@>d60W+HRxI|m4pyVR37K*; zz71&x(_4OyDUsbqvjqrfrpTCGvyES3`VQiO0}2?f*d`k0RZlsXlmFl~x&t^_6tCs? z-d3OrDHI{vx06fiy>O3!g;+m58p)l1f4!1MLd>!4^(zqy9svC`4CI2{5F4G0U4~0> zTXtY*dh~)CnET#KJK!K3&8}Bm({PtCh%_azRa=T>cq!)}Oy)u$E(TX}+s#*X?iasV z)rk3IR4};!%F5;!bZR#;8d?i7u^~44mMUuM19$j>gNqxghGH30J>OF^3tW^eN+lSY zl<77yU^Q8-oud%TnKpWolJpuQMM<(FMd{=7#vVUwsqPc!yxC-3drLO#PsY$t*YCyWU^8^;b?8)~Ojzj64|{zs}v?(ky%i-@8vYH|~Dg z{7zcvlDg>umhuZA^i;E)=60mB^4bDpHY!INn(Nm#vLY`{#5WGdegPlWYT)z^v}K#6 zM^`x0>OKHj;x^&npD!rGV!Ha!#*Fnv-==-VYd;rFwI%8@L3^`jh9A3*#=c3z^cQ7{ zt~fh#<}?@WP!j&g6C#lqSd@2VwF5H_c*hO7;SeMELwpbo(e~|hPpiq@)J1sTB&p&6 zAT_D_e8J}C!iZ5~ReqaAH5Y3)=0E#f;07D}EKSIH_TOpn>ezTOe=gyZLbRR)kil@i zv1V+JHNTVd=mlaS7k?umb^ZjcqF++*nXJ8Q>DgtcXoFS(r#T{VKrYHM$d{R4Fx~aM z51WsK9%pGWy4Ofm_?dv^$i<;y8c;d1fPd8DX5Pbm{sC)T)uy80+eLcJRFedwSa0l) zwG{4w`+~w14vJ@D1|Yf!{1l86!Ar7H+N+D8Qtaijkf)VjT)gmBk+nQOeV}#Y{P0}u zdLR>4T95edX!+jMcC4nzP0nQV?fYW4Az&r?VR9d`2l}DzS*EO%$K2!nnw2HQhgQ85J7CC>EJlB;nTXglMgp@Q z^V8+BHF`_0v0`HG+)S2tATnkN=LoTlrp<{wT1P};JLLbYBPuk}#l`I~c|B8!A^3(q z4MFIJzV9`a5yn7Xq9QC@c30?>l2F=xRmF2Y_bz2bg2bTMELimE*4onw54ArS(TPGi z#Z?yQw2V7ys|!Ju7F{H&p-%X_O%O$iM{#c)5c%z09i@ar*Te6K_8s_Un%3LBVsp9Z zD&scmyTe;95>Lurs_--+SuD;s3TfhZR{*oNhxF_aA7piG=G4d?DNr_F9IY?ga#?v( zqeq)z)raG!O*icwznUJd6fJPY2%8xxZZRg}urwMMVB!~T%zr=Km>iVYH?r}Euq$@m(xa`iJ))&iU5y<2V)1F;zO&UoM40*i|Sh zpU%IHt$thRzz?@qlz;n1eW)KkmCk)&P0Z?(Y0O%il8!tl^Npv^{fNY?BTIC-)|bh4 zoAwk%W#9KJw#1AB7zhEOMo!KIO-gO~*120GizMNe&?2G~!&>a?L4Mn)P7@{;Iu&yQ zBv+>6vbOWdJ8_CY7+tbZ8!Y45&nE|@J+bu45Z8|8XF_~HI~Md;hS7N=%@^Do73Kp$ zegH8187faq7!Z14rtL!`;%boz*1$$oY202O^Ac!UPt z`7UgrT?xv=3LnJ9Ig$DquQ>Zs5MDw=Ut|H2|1_Uo?;U5CkG{TQ@5tn2XS9l~#`;C5 zC5A%~TJchD!NSpDMao4s2R5g_F7xk#$5Ha<*gwXDKlB))z2CAS0#A!iN#=X-A~!lc z3y@OtJQ0@wF8=Gx6AU|j;J!$x=V|p>Win^B(^;~3WCHT(C0VC;qz+;=84FL_Dvdl9 zlnWd;mkd5}1t*?a?;bkurYNW>M%*07nC9_b>^6@INS1>sg>~9k2nlmkdg@7To!4~B z$&RC4u{m|B@X`YSGL}>?lHk^YAXg}8RsJQIfH)9)w)b&}MUe5QqAD7;9QN#z?b;Uk zh*UR+urx9Iwc-QM$XVH-STXfR8~&Wabv29fh0TaWCt;O^kAg^c6{rPuDqtMiBj zLMmw0>i&M|m;Af+V=+Zf$Y!ctiG-?2V0D?UXT@gac5_Q=S2dHw`%*m~g*ECd&E2&# zn#`Cx6YtS^hV@?j-dp!ZN{!?T;2s5+$wWhlB4-ExT^D?C>JYH79C#emx45YH0w@46 zn-d8#uBsc7_SB$FXm=<6`ZapqZQ*jd6x*ZjJS}Y&SDOpZ16=%~$1)bL@5}bkWPLWv5{7Y;(Z55x<0go9BT6Gc`OliKWq zm2pfEvh~|Pw|zH+x3udSg}8&z_*qHA@ex*>;ySns4e))=2Y^D*infe1{hh~+P9%!w ze3dvgBKVs`vOiAC{>)Z(;m_?(Xul4?yZJ2Np8eCUlPiKaA#Ci_XF^V7+LQl7(D^{V zb}5g*-<8P8;`=(lxqNOa{_lH81hcu+e{=*LbyO(%GU4k%jq;Y_#_*kBQJDrp@D0A={v2*Dd4OaW1b_v6I`Yb(y%T$EV zxSDzc#Bv3K|Bh!wBH@S>5uHxilSgrC%|zIXzuryBo3H?BOd|!(Sh%d>8pj>0I9+TZ#g(91zWS;Y-r^L!5uS`jC6^?Ng zmXm{<%Q#ylePw9W$f8gXxIZ=NM>+WC z{AtheH2glVC$6rpE*{5IDgq?NjN7e2>d%pETs*&%Jwzb^l-i;#j_e+K&F!C;q=|BW zb^vNQx=H}R2K~2;t(zh6So@f9fdz-JZgz;U%KlDPv{hf^yTfV zj)P7&5Sz*4_>9A1ps;KdI3B|4sJu&eLr3dkS*dXSP(Xk{PBtpahnPRE@oSp$VNjI} z5^M}`YYsp&xY{D2|FB^y!QP;$PJQ9Z$;mAyx9#tw)+4wOtM46>OW|Gz^?;G-B^MhE+^^TyJuAccPP-#2$|*R z$4Q`JPUZ08)OzGF>NKg~wQHRLRICfd+2yh#lhR<14<=ux8Nt;X?_K1-1*Pci9Fi?<(^^u&-{6 zIg-uYRTCE+y_A%cD$Tc84kt#gnOlF^4HYVia*(Xe)lRT3KXmiNrwzU`bptZwBh1WW zFZC#fPr#lS0X!OHff7IvcM7P`86xM6KXcx36D0|<0Elkp;;)^0+d~u_T`50; zy=0$tWVTBqMeX~E4w2JLAxi@v?jF?m5Rn>b2JEg-U)de{Bt9H_@M8tGt&Fec{RwR* z+kjkm*bWkWP(57aINNp6?I3}XV-fZ-dIem&(=F$K3*29$pelLVi$=jD>>X5hIy+$q zOOyMGt_pR55G)X@avs=j{;gxu>Ld;xhZ4HN7h-gtj@CVPK_MB4F3ZWuxh$~uq{*$a ziuXObPE55vzeBSALpUaXbu%_H;hP&@4zGN~on#l_vH(y_moA!}X4XPSxN~d}t(&Y}N)&C_021HR{x6NHu z@;#a*Myd77cX{{bin`svrTO(3PQ|>CD&Huiw3-fNu^Or4E)CO@nVkg|lHL#~E5{FuVf*BkD^+BcMAIXB@xCtG|!I+48{de& z$763ib{I!$)g-)9$+|xylw?2dG{iZ5g-Pr&`f|>C%PRi*N4@WiG|aN^=x_Iqkt6Rt zV)5TLuJeVI9@Vp1X|MmSXM>s8$t82p#p#)@Bwj9VkPi9+^3>AqrzIQz_YU6{q(z zb<=BU+n(P66%!matD@D!hD=uyqyhGL=L~Av(p3^~-V>LHOzLMtd$Jz`^$T2zkuv`T zmue!^m$oqd>ev7ou0;BX9KKUp+O+a}xqRU_JI`6x-`r%K5SrL0ZSEyOa`;sM^cwc4 zk`HA5r$fJH$XI$UTOuk`EgHZ1m=~#JeXi>u0;03$0A(lsoaNuuJ)0IcVIU22NgJz2 z|9N4k4)>n=*OaF!Ex~3s3QCeSkDP*8R%r3EMRZ3yUx+BbjVY<;cr zKjwmB@708e$cG6AD`{>{htWDJ5Y7t&l=6-oBFXIjR|3*Avw${~=Sxuh-OnTleEW{evj7E#eBEj^pF@774O%<3-qli=`{u=xD zrvVZ`va5LYQtH+}DEybRMR5W1*?-GJ(80vPA$cx=OVr0uw*&B?)hC4HUv++*l=VY{ zA@=r>fTjiQcx`oJ8rAc?tB>(Ecl0|kD($e2=XwJl50)>);nL`--fuC?d{3C zh;S@D+xR(}TUGYhP_%SMV(LMAklh;5uScUS7>@7w8R1^a^?~%bK^%ns{z4HE@6#^Q z(Wd_P46RVY-IV;v?hk7F0h%wlc8>i8auYq=J@51i-Bel{&&%%f7D!7t8l_x$NMp#x z4THv{=vV+zszga?Rhr&ru`5QxC2_!r!^`1hB$wjrC)XMClOj%=ILh(PL7{&)D-3w^06W zNMW(};rOB_>E7gf)d@zFAD3?w^YIg6&R<6yB%7H%dGLg%*{@%;?)R;xI>_b`g`Qgr zh_zT1qO*m5S6q)vlY!3W6b}Pshl@PMPR&F}o|{mY-xF(7EN$=V1N=wqsxh9r;p}tB z+gL4b&0{a*(CM84X7E)*yU*c%5Sh!z@MtpotuRUCO-cI D@?})v literal 0 HcmV?d00001 diff --git a/assets/global_params.png b/assets/global_params.png new file mode 100644 index 0000000000000000000000000000000000000000..fa24f80306c4248e21bf7f8d2a3393cd23ed1e4c GIT binary patch literal 84073 zcmeFZ^;?wR_CG8kjijV>NDirVOLqv;og$5NmnfamrF2M04h$h((#_D_0}MIyopauC z;&Yz=;Q8Uct~>Wyd+oK?Uh&#@w1%2IE*1sWlP6Db6&2oSJ$ZtP|Ktg>7zP^RPFU0z z`X^72jO}G)G!$iIXf@oNZR{PbpFCkn^h^LLOCu3SNGKCXmr4~DbQNTB{KBEo#wNh} zl@jPUUSgkr?M*?z&ri&c^|L`1_mu$g8>t|vry(+Lrsv5Dlm`#Rt40d3(;=?SP`Bpa zI~x3)@1MHe7EjJ)|5USrHNJ#!WHKgnplI}sc=vb zcMzOt@hNHDXwK){td|720NN@8ZMdgIv~F)2?BYGRlEYmHNMi|l$8pU2GHz`9^}v{Q z;eBkvU%m}$q+kcS^4EO}@7dBM@8&$}We~G~-9hoPRwu<{bo1M%FA3XRT3LBcGflwM z9!{NuKa2N*4r`YKHNbCe5 zHM_#2FIepgkcY@KFVL`$YTF^XjeJr^5lu+ z6U8@DI^IY}Iq1Q}va>fd$|Oj#3EA#rLc6837Gt+#m-{PTUf>mY5-AX(rhbZJ8%L&* zNL#8fmgF9n^jxaY>IP(r`|5IR>}Z6e?GX92>*&%$Pfrit+S;~Sp;zPfoK@o?e?v+g_AtN!QRf4$;@IudO5yzAdT`Paiw%AGhEt_1qX|GRC( zLtXU$K@-tHNHd~ce9}aw|K{h<#t}q4|3&9NUi$xu{yC1c@pS(t7)cwVfmHVNt+5UBsm+Af2rIA#@2A2X)L;r5yS4@1kps& zyzt-ZLyL*rGQ(~9!%XTG{fqP>`8dW}+gZlZx8Ees9j~J87Rn<;FZWPR91R*BsKqO6 z^L$o0;k*04y}ZLX|3yG3Q}Xk53h{ucq%n@$;Q5P5nWUl+{q?^Tkx`L0o`R5Zj#bEi z>j%X_{o4rjx08+#$+Jk57_|D1Oa`elqW#gCn?%gtd|LUEQUWG?CCz%U z0fnQpzXZ&i^90Qo=lUtO5n~h{&EyS+#8^V4BM+^aD4FP%s;yx2ZR^r^>%*O~;I;N= z+|;P)G`?@bJ!~!Clxo(u0v}kRq}7K`2_lu1U!O?I=6#Yr}sFBg9B{IJ_#Kk!H5WMVfJm7tHwts`(#TTHvN!_4R zguKMfmO#RQTuW$UoL*ailhfvbLoH41z4!8BS3*43!FUU=ILo3urZORBE0#As)37&Z5I;otu+GzFlu5O{}4^#P# zNwV{e1@-)ohz3W;*gwr@NF`(NOXPa$bHLb1=Vyv&x58hx=da7yT?${=N@ghK798k3omV6gYJSr34?t7!=F9BSkFXS^V;9Nvj^82 zmtR^Br>1_dARVfEg}vPcZ;_aJ9rys*2eo~0CmqFcuF!63^yj`b)(5(ER2qTakZ@#i z!UML2S{|znjTd{_cOs_5&$l+6R`$5$2f*-KJ@chP^jv3HQpme#gf|7ns1RTR?e_#>eKKTIHs(ZhXw@ z=Y-3BfoHo9H~vEVeD+h-Kb5_fs04-^^plr;tIxyBOqHV4%4%wr?SlT8oV0+D-#W1l z&9KBi(>~7pzZVCWT%t9qkr8=da=mulww;uvRAq9H8GgzSO}fwh(%i-k+kQd5O@^_4 z=vW-xE|Cs16|LKAd8DQRCMO-xXC(@|ldkuy!gI`hp>yECg`zEIawtMJly8rFP zV61@Yu3i-A{_=3UF`(Tw*)YmY!ea~HezDS3Fz*Xj$Gg_6J$lgXN{x|$^OhO<@7?{7+a?VLu$+GC-_xVx`SSXD6a(TlQ?nMWX*7&NK{~kK#G;{qy zWQ&nejLoS?50-Pk72$(Yw3#zFAm)cwf!P6X8`p_;a9`AFhEH-^c>h%RoQ zOGpry?2?+D*93_(92%&FX&pf z>BZ4g+nz7uAP$LuGjXpFvc9t~@-joaE6|noYZ33We3eXY{c@cu!fRS@DTuEDF5v*M z1@c{QE|AxD0;h|TE^RpoEK}#bPS|u?ZFuS_gD{-qFF6ffyB~($20g56WC&TN8A^N@ z@~~BDE)0@@-d`l84?jnd9LX`rwN0G>lBZA!^W)h$!7brT>lMwpIcB(DIwuWz#HnBySpn z{jumvTA-et@t%hLdZ*9!mn)&K8tlb;y7OjpJ?Dc@-84!_H^CBZQBvi3@aA%$f%$g< z-%2&|PeAAx`bicYO4U+>$M8d$0Vn@A^NB(x^UV(Rv2&}>xavD2RL4=mEWHsQy25}@oobue?i4_hqEP}S z0mD_0pq;S{uArOayk*<|YpThP07H=CCU%Sm5A`Rc&fRjU$DXCu>peDG?GqQdkO(pG zY2s*BW4MHPu*L6svjIOEv}=4XDqRlK_1PI`d@WcQrd<33#|_^MFIrJP4vjGV17s%M z6pyQ(`?AOUM1UGA-N;w5{KIw6ABt%~G*ET!LWq4zDT253mr{0OSUp2p9Rz08D2?U`jng6gL#WIB_8!`0!Pk=2*RfCG-CJ zL^YNFozyd$XFjUml<;p793nxNwO~U}c$h?DmfASk58oMz zh4%MNh7xyE<2`LgK~tXV)}Y^&vUV(?@SGzkX;FW;SIgCPR`)Q6PPLZeSJz;59^4l~ z0S7T>*MMU`h0!6;p1d^GMI|t`UhDWL^E@Tp^d(X&gu59yFwK<7wWhRR>WF1KN?o}? z7@3WZ+c2IGI^u!xpxaMI?kh86c3sVIgo)j?33s97yRq%PNCmv3Zv%TA;%Zg<3#AiH z(LbZSO61<`ECSuo2>pNu<6t+RgRW%~s-yE-V zdnC+gH*4j$Uu1dP8Ut%StwuM3kx4MC-D{;)>9g?ds^Y5ed!Ob&+aSFxYvj6By0UVC zuxJj`wgJ|Ik&RA`*h943bBlokw|fXvK0`nAyN!DqypPN4>f{75{HVtKe{}TEnC*}F zE+kuuRv8x~yAUc(Cdx>BG|pZ>0mOfJo%g!kEG+Tnq~&z%jE&}l1aRXr)N8lKxJ1{R ztmmiR%AV-+GXu#_wvwkerquU_&FP}N)ZI|wXF*p(uS(pXlS6LP5lYH!7$(nay)j$0 zlwBKggZxrGHz}1}cTkxt9QZsGkpEKc1+7Ran&(dD`9GUUi>{|3E zDDN3HS|wzd42PRzt-uB1*;Inrps-k(qV-+5MgIvmzEv?k{|}{^q3+*dmG~CI+>GY^ zan9ixc#}jpxR)0}k5|lH^9i{Ljncgr>t!@k%6^SYpZtC}9sBmf9U*m(f4D}phZsiw zu5?!}*iYL=TZ`U}{T0E*NEV`AY!6qt-aTW^e5jnX{(=-j*s4#$uR-4zYS$fuOZMz0 z?zOT^Lx5w>-zk@bc0|r2p&fW^_g6C;1OsX)(nmtDja1&j;{tCC+hQlju9P?jZpkqf z=+{RRbj93ACV_vd{(oo$5`&8r;lOLWaO|Y&oQA)m5=cz24{f$B>9pFPzTTJ*^1^;d z!nPFg_-%#x%m0}G>csd&j$cXmukiUV=Kg6+T`|8R(E;fHiW~pq8RFtAQrE%%%_;pe zaf6>8Opd=nUx)hN-~34lLG;=GC(*6wtnUBA(peGVK#oZImeL>-Ym3`s>Gl97x`fX` zO17|%l3KQ4D$!?4Yi?vxVkACVBaQSWwb{SzAz~V>8PNx~(|Y9+^SwX?WbzdXhJB_W zh|44X`SKsBu>@6AM+}+QA&ou%$x?_F=GSxD*E~3wXFQ0BNqdhmPn8}}d93iCouV~I z#xeXshrPe)CWpFhg;-)GukpE2q_15Xi~g&@Zy5NHPDXL0P2#-7@7%~4vWO&EhV10U ze@*)gg-#$IVVHe3;igsDB%$4EU54n{S%Cd(pwLZf>ERS3@QL0#{9f#{!X?%})`EW@ z27nIon9JGkfJvaWc2vI47n_TRdajmjbhR4j(!1M-An7HeQ0#|za(uuUmRX7O-+94r z%n2{msmt_0ah0YZ&FkZpbl=T=)4V_)9i!`HdF&5AzpRHRRIi8(*L`Q-ij-=~JuS(azI|JxX>8&Hk!3Y#_#}FL4;nZt7$hH_ z6qyIXAnL4D&Ch=3?7h%K zjELulm)1k6FioY5_`aS|l<&GL6dAq`k+Fx0D`-EbM(h1f0h29$Q65uU-(q(Se2C2T zE|cg_D2UlSH0cL}rulKRrqf%mWIW3)wy`D~!`j37Afc$Y%t_7>O55L!OJ<`PW3&3I zZH?ZfAspaOk{$lHR!3VCW8N+szc5>12ZrCxk4YvQeQfPDDq4Uy(xqK0D0koI^1k@R z-=+5Z(EeU^Y7cQ zC7&j}g;;gfX>{OH%qHI|gWN#Fsa#n$dEV@40DXX%Ff@7W*jMo}Q`P#uk!lRG@e*&D zX!WO2OWnod5?5=TW?z|UY_ci*g!E!-D#*P*;Pz_Q*dWqf10b$4uDY>&y|U#!{`G*zS6eg`n%F_a4uQdw-k7SpEjy@cub zAkv4!WAmGkwJdXrAlMR3_`@1nMSLV^7l`ZMib#|w8sz9U^5kp4YIanwvxtVmeszo> zf=v_h+uMF=pZytGv}XsWLrOXMFQ#@?Ja&&?+jP+gEK5R++kC71Us6WuV zSAyanww>ddJb$%%9yVXj;Q}>vtBi7{I|i9M_6kGC5|WPs)5P5|T=4Gihj-^|wf$^G z&p@{T!^RlD>U00iT;J_j(yjX19_z8feE>N%_HtNpHMLj`m3K|)oVLnblGnrHxJO$NkhXwL5V&e4h_M}-V-Bu z7mtp-0d6l3Z(c^Hye9vFtW|Oe>;_aZyn7%KJC%+ zGl`TMe9mq+C3!fDdiS!?oT{dH z4ohz)Mqw_Os(uth=cliA**hkk5~VcV^ahpJzSdh0<0S#1{zO7GTGmQRql+^{iWCB) z5rJYwV$K`V^{Bg?NEB|vvm(G1SCfJ)a4wFo_Hru>FsX$1D#DFpv zka@FTP3b4+MSqaVP5QFu>9I6QCiOAU2ci_4IkPnfdPJnq2hE(5 z!+SPzV>OLmVy68%($h?#<#v|xn;w|D`b7%WV37$cODNo)zSGGzTyy@JxhiRnC8RqUx8@4`Gr~(Kyo-*5D6xgRdOCa%HX|smdHI z<>@gJ&Kqx|>DW(!@>(m4IC&>eml-O<9v_m;8IW^OQ_$D5cuk4zyqJ>E@9%geZX_fU z%V)~Nhk4A_QpY|?=?mdigPGEaQ^D|kfk{JhoA1EeyQPaMKk5`%0NPM+hu*N%Rr$W3 zHBHO$m|=Gmm5XQ4t;GY&+fJJbUIFdHRdnHoV&IbaM!kBt8o$-#rK^G7smHMA!ED-k zQPdESsowfzqh_ne{B#&(>b-azv>nWDlo>tMH}sD@XB`~U*(2d;fAYA$68QGUZ{X!= zL^}aX|2Oi@o+U@RdyI#RISFOYAEpm?6FMNS);;>S^I-S!SCL-Ft3M56pH)puzLmlv z#`u}+bK(lRIASkcMWhE{Ce0u?R4=~NR>eIXjL3#D9t$_PLYEq=mcCo`VohpVHvIti zmb7bad@tbej+?30)_z7MyanuQhI$8H9DQa6if|L+&#gVh{kn(35IGB~90zNRphR+b zhau&nyDuk1-7gD+#0?OJxs`*Dw(?!061$WWs~%mIEKoOx;^J69WIKxHSq@P)9+fbM zGw?Ew<7*pB&-Qk4w=6)P{Cd5F*WreA@Fb0MzbHpr4dC1;8SR$P9f5uLQ$k{yz9>7l ziWhl7nsyPwZ1~I(eb5!v4tSfd%YP9nCQHuhSOTD0al+C6$|o{CMVvAINnJb@F8T<3 z{n|8UnHUxuB{_GJvwW&ZRICuM_PDheA~4*CYue^{c_4p%QlK(P;=5NOk^GUfb)bT) zeSLD${Cw7rZafN685t0hz+`m-A1^a`ExYA;XvT-E8!*@0wC0Rm;N_xGiA0G~M@Iw$ ztK087h&E&Oz-iZwISxn{RO*?V2x-+#-P27g-dYrzzTYN5B70%ga|a#4UG2lvc&plMIxz&cuCEn?9k8nNi9wZh8dWaMTQ6 zI}QSpl6Z4Ocl!(8dxm8%_X z0U!>r7{rHGY1$-Z@Y;!MMff_g1a|HW5}FZ!zOP67@LlH6>0%vSX65z4i@xw3&dk;u z-khcDl+~5H;j1twOw%Aj=A_6v{4C2ZpzN4sjP0HkuF5b3a+g)D`j!OV0}{c=-it*C z$^fN8?(^{ap3dGvhmQroL?*kCGar*rV0{u4^WowTZb$PxWyVd#j*HDgBP7bN#9G~L zA}W_15i$EW{`U*-_RK{phwfTmQEnx4XdDWZvxkAn2v_I0E@`Ic$Z2iVa*p+c*ge27 zv7Ea0!?qPCF|i=egE_aQXGapWl(PYx^)@585;((p951Vunp{%j19SnVoWtprIJ@LV z&BrH{D;?kLJAb2$dJPtCQ-qgx5bLO8UE~S7Ms{$FEn5Y`eQ^&Iii)pR)GxYZC-X>W z_nsLqDy2H4W?SE79fOB%xL&qFY@>1m+6ECBXX}l5RwsC$DrK`d-Yxc4`mv$cK=-Y( z)l%BAli^n0<>cg^-9mAO&g7~C^;R~VR}A{rH_$||=JgPIlR}?zV7qzpX1(RT0O4Ua z{qPqikm`NfSi+kOufY2pj!e+>cCRadOJCa8bi#PYA)s{WM`Ig_{7j*cY3wUy5&%S2 z5t5^j)3RMnX|;OvRQgmpwLEqCi(`MIbHh2W`o};>&TzdgxOynPQ&Z$y$ZV4`fb-y; z%ZIs2qpY1~@A2uG!`lhJ9wXx`%tcSfDj7$m!Kl+`kcwr2Rdgn$NMuLXW1oq*NB_d5 z*>64!SB|BX+w5$99boqN4Rk+UR+Dt@(Sy#koR(@+Q#eigs#aTl+S*KTT*<{OVt)Tn z(Gi8#Tn4TIp(hLM$^GiuTp3clKHVM*&EnH*OcK?eJm3?Z=II*D|B3Iz;*OL&DiZtBN1kpy_=C`hfwneW$Og<>OKjzUyHlK~28tc!d?HQ6K?cVnVQd|6#Gw4a9d* z5*XGxmXmA#A_cENC7VI(>M|gP5WYn;-t0*x&jR@xU5iTw4>(EH=}U_&VVw*&lnL+G z_W6tJE_ylaHkxamub2+`o>GN?pqyg2rSTOmSBYFMuPrba%=xpFGjHF%>c^DJrv}lB2L9K#0lcL zd22u2T+o^YF7`a~qPQ>mvNhAV96J|27Tp%t!q)@ zT;*G$5|x|BQ1H8+_N*Aw_+e}(-Ikd{q%N0Wi$ImsojL%Y^uaiOc$yvAl|p~HI=~-;B7DumU)KrOk>L$fLfKX zitp*PSxC2*eha6rC92$Ki*lfm(2scRNtsnqI~HY;w<7|jlcFDzI*hK&6-Q$2s&^4( zhsoA-Q-_%{Y5>KqhIWwio$q_^9Md(REBUX^UqAa5!V-d}NMd!?ALNQlyW=j@1G@1E zRJl7klX$L%Noo@Gr*>OS3QmS5XZ$|!Gcn|f%)O8*m=ORCe~FaV5d8Q%CmG_rTuC>$ zU19HT6&06Z{fVGYka5AU!mvh73uI1ZixCEg1)wj#+7Mjztg~LSsTOz18_5;Yi-M)Z zlY{9d?g)J1>~BeuE^jU>_asX5DwN%)SzehA_tn<4UMtxWU)&ZonDYE+;$w3O6w#rR zQ^f0gtr#9M&b8PC$|c*pIIRD$)1Y7Hzt83cf2I*u5rp>gA6AgzV-M5;gRaw{@M?TT z-Jz1f1Q(4OM|paIz2@W)M_SaD`&rI!FUfwu@p64Ugf1vofGpSBxii}BlJfU3`I?28 z^V-U3xyb>&Ppu0sw^E8;O;gC6$@&GO^U$yQ@cY|C+^m@&F7LQsPqp!UxX|+j{yfni z5b0sGCHllqtzqCB;le3KsUFTCAJo2?XNQ)I;NH{(^ zJ>Wd;9C+n*E&2*MJU1~r*;)PJtOu`5Epb==+mU7sB^R)tAosmVp0K~*bj$h_H)A(; zD_W8X#p$|qThy4KxM=;k+G73gNNqrFZ3jq5-o0PDp2gt{qBRE(3-0Z@4qJDNb_rXJs6i3t>{hOCs%JnNJ@7o20zE86@icjpHS{;la9HIxSfs%Ff-v--3$D?``@(L&!`MJ~> zSSD8FHCj)@Oebo}%$<>Z?FKCMe|g$U%s;xEC1g*pngt#6h!?mNnH6}@ zzlbW}WtptB6=Hd(*Fs$s8L%E|R2%D84q~%e?VcX%U1`pLAPXZidA+)c^)%_wEXf`74IlN*q{SOSN_A9U1wt-f_FJ43+L=46{6$1ASHp(k^Ooj|1?D+=HVi|$I0U% zd}a&w7zzT1qPvGs@M>76AF*gn&+}DB^3OhNbLV4H{!e7n$j5?fk&3P&00kxQNi;iS z)A3BuF{;q{5}d zd#9euQpLa4P7rlgi6(ipWWoxgF92Z|#|J@iAs(P?avf&)p zaxz~9GLm08Nd5B0UpZ2Ji!ZjoasUwquLuNbka3^gt-arhKYMXK&L~Q&)o>n~uFJ7* zz=w*7#|)zBllrwg$SdRV@YOPqYmQEkj0tUBHNmr0`w!)7c{T%wx}knU9AgoGM}} zv93EE0%NCdeg%g6`K&HKt<&&BB@lf9Ozf`uEg|OT1^q&PS4`?UHJ3F-hBtsi>RE%2 zz%R&_Kd|y`?hri}%}Dr2wOG9HU9n!ye%87VE0T@&3n^!+@2kv6iC+R{mHc`AfzUY* zyi{_ZHG+_&OCqYq-=%8qx5Jj|f?+tHMMHS%l|K`+(vP=aI(s$mYugIWxq)RK+Q1K* zwi|mV;#{DcObE-(Sy+4#$%lpgvKNRXde_;<1Lk^B+CU5xu}+qJz3Wp_wYhT7fo)MB zy+z0v38UTbPYMvv>>=!1Vm&FZmqc84{7 z91{sY7<1#MRp)!_+0Oc;2bwf|gV3bl`sTR2lt-A(?q1GL-dMWzVv*|D^5(W*1VJn` z0tuuhms@$ta3+Stc650Ug{K>wz%C#Q6x}(F zkGpg;2Lpu}8CViy4I8OG?_FIeHPpZnlcyv1UanjM&~7fZP#T#Z52jhivGjIcqw5x< zROP);S=UeGFuj%%iYKmqzx65xsOAlzOg#`np#12G(?sLS{y2|e4!6$dAk1=;7> z)1fGTYb!eR6n(@vQ_?*1`Nhj4(|lbU{(e-9qJ?I>Hed6sGi18G0VnnDVX78TN`CI; z1U#nLeNKER*a+)?DakMf#pz{J1s>O47e&nr5ZZf-KORYRPkl-SXe)i{jt`;oL6kx* ztcgxiTY+2Kh`MRF>Pp~xDqJv3LL^Z9?u*Vnq?+pX_P5LQdE>ERTO@llaB|c;Fx(b_ zNngdK1ZS`%e#O#fU22N*Ex^QAbVT|kD^7D&@ct#EuOrP?g55qU^BZr#<5hJ3G6#0_ zPO^%yQ4IkytBFv)v$vkFUWT0OXEphKhM)IdWPB=!1zj}s{%N{t00acs={}pFX_-Ml z`U+jSZCb1)mSnjDYyeZ@u~`!5%4dlmww@OhG(;7tPmvHL0dK&+U#s#m zO*e(z?fO=UL`JPNqlQRuiNWjmICw~4Q#?J}S52l9zPBcscfpCsx9w>5TQ|wJ4wcx) zH%jUc+$r($IEfNsu>Q-W#YPpzwWYh&V90gWY3nlB`#48OzMv}7YvNd7WiwLTU1C$U z=j?f8sNeYq>O8K)N8!AkOe6s;DWQFZy_u4>FXjz$?FC`d6c%4mr`BF=N7OBQkAA3T z_-S5r2cU`6eY~)RkGF#ZTGs?lF~!Nyzq)pzD_T+2dPvKK`8X|rv_q*~Y(z08RH|4} zF>NP#+bdBgEFHxx%}MQlgYwtz)Dy&0%)n;W8J@MMq;AL`CQ7eTlb<-6T%RoLQe30| zoY+H|t&M$$k`hAA8@kQ2`VoQo8hf6-s!EagOA(`$`d)M#Ch{#IS5-U(^y!&peo2h_ z?Pu$iW(!evdgI3_<->NYJuzGpJl{#4%Qf*9<0$zxT~)3&=TGj&!XEZTPPZcb#Ykf1 z`*}oCur<}5FD8^+IQCr$Z{pXOdI9GZL@>4DF>c+!orVNIV)T3@IlQ3XJrQ(Iac=Wh zy$fP4Z;Ei9&$JQ}$IzqBF`OXT-}qBu5L2iOU$8#PAE9&Q2FY3zZICT)Y)Y?<60&-$ zavz09+5P^W>}HepJ6M{I+2#%RSG(=b_SAveq?V?$Wl2G3zAbi-X=o_6=#^VvO3iXf z>a!5@W^@^sP-@YmWR=IWezQ_c&sXFfICK_i8Ef2FI2qGi#Pj*Tm#B^n+>IeynB+2> zJQzEq?=$kVrMTEmOhsihevx+vTWtyI8^cQ>XQtdBX<-v}40~c^TmM>_#cg_-R+u|MpoN9AFwr2UYSxyE&9|w7&>F4#6=Ec56=}u=< zWoq!U;rx2AC^@Z;n!cL1Om&cx``S7(OFW63+V02rOv~)T$yD(WM{>}^=?^-MAqf=h zF`XgmlrTNR>5`O=-$Rs?r;A)b4feB&VO-3)@|6&8bzC$F_%^&5I^4bO-Z1LI=!ja_ zB*6aNri!9F>iQKD2Le9%tv*_mRD8>@22^xh_#j-hSp{&R_ssqJLjB&_iuMHdXLad$U>(}?w@#ZZOuIEw=)5bU|gI%O3byk$wn=ZxU~LCKPU@c>k6)X z*P*r8bg+YyMdnzvy-6td&?Ok0j)X+qj+Alw`$D-)YZJ)}Bp%l;MK{xjeJP(tiLX)Z zQgpSMfyqWnOg!H~!SW^J`Oc_GDcAdn0dQ=0*HozeBX7u&T{B-}`Z_+2V!j?P%a(G2 zurDi*#o9@H7P}H37W!w#kUNbHL4epG|2u}%M6+Dm`iJBFQh4*+Mo7p-VrBk-Ma+7n zb*aoPRskLP#89@00S@=2V`s*yjm=0WM%SwgK12L1!ay-!FW1c}X!%4$-6o03r8ncM z5D!O)>xR)%4Zq8x`CE->D#-?q&~Mf{No9Owkr-xC7Ts4LX3J}}OG&sC#ClM$P`fyJ z$hdpZtgZZgIq28|j(uMl|4_TQj;yrX&!|g*$Sp*Pxe_d;#ic|gNbON+$h(aHXT~lYdV%H^$Qto8K z1nqFv$&B7d9=TM*42t-PxCs&29%Zs!ylk@7aQYGV!tZX|9ZSss{*;$R{eu{Jg0iYV z7sQ+!0*TC07ig~c5>PC<%(foiSgO!YiQXZx$Xl337WU>M7DQ9zQ3+Y-~NWO zL-{|Gc432k1AeGwk(GZ*v^^F(E;Uq{fF2maJ*ja-r|@z1@?t3%f?%6qdN0qDEB?-` z*Ukqx$Ux<0^&m(ExXm8&@C*+O=Q~ymlotNzK;z)@p6VqS^thd;Cd8b>6$Oq8?mTBS zq3{jQSYL}SfgKRL#*n-~k4HvUojmkfH0MgyBGV_a5aXuoJ#+?b?L&$&fpFNibdu*LrxC?b% zOndL1yPr)dFCV zV(G*9eCdLF@`*Qe0$GXxc2SRM{{7{ukoi2V4YD@*$Aj6`?JidhWQKlVTP!9_v3Lw0 zSnVBcQH?`}?@~*_=aBtA|F&m>Xt0In%ad4!10ufHov+h0g7((G_4~G<5vZuAacMH7 zUk7{aPfSon*>8zc-hQX7kJSvOwul?V!R7NTjHMH4EuzLFGEB2xz?;b`6SG0%6H6zvsbD77WSXy;B=(%as(ja>_*mas|E zMhl>T=O9+o`%bTBlXQUC*qL=qps)Wq12y7BYb$|r%z zAMxTdzP~5Lk`?iSntjwm?ZzojR(Oi~+gN!scu*QEH(}r7-VmjbgeHDGT@mtM;ikSZ zD;zlr4#C=Vu&PmNcyL^~m0c@)nM)v!L4`+FZ=pDaR$%MuB7(JBC9u4$o62b@cYVA> z`cwW0fwM#Navy#pR#L*L+kU^$19ICZ(}w3_|YJqGTj@FcUyFIs0>>M~h1BPE%m2XYVCz)mXTK*D7 zGhs{&)e8B-hb#~(h-Qg-dnqbH?Q=#7vn@Y;1NrpUhSyKqxa*eN3RxgO0a`*Z(H>?;wyMC4w}hLbuPOwBdTZ>2AyvF>kb-`8H78?>0wwhwr0Nm=rx zDZjP4%RXz%H~0~jm^sC+4Qx0ko74`zpsJ9{U9Ek5eeLzR4ozfGMX_dV8Pa*|bf@V1 zStyV{!fee`ua$an+@ldQJQu05;2br5+VHfsiyW0qIp8&6O`zP)XiC7I)n%boYUePt zBWIKc;j5W_mY4H|@@><9_VIkx7stXA^=}Qu5%A7zx&$uM6Fvz? zR%7IABPG&tp%#K3YY_XrLb_9q)y&WNWkFmxiMFgq%UXpJggV2^U`G)+H)X*T%sutJJ5xC2u#r$9ln7WY39< zykFQ&(}-ny_lTPW2KoW(?mb>e=+?xk_ju9wp-ir8hSgE-myb1CF_}EZjBvG=)zeSe zTnYe_G=Eu8y_`7gfw2J3C$)>MynBbo)3fx|xtU$X$mv7#t>((~rdZSW$?+5@%Yy3w znZMZ8>2V6TDO&(0K4Uxo@L)BT`3iwV(L(ysiNwp0zy1Z#%gWHNem$SI`Zs=AA@9it zXaNnbSKg&IGw2oBRPa7|7!slLoHqs;*_q0~eq`9z_`?x&Koc#zUV>`43&RQzsZV`# zcJ#$H^R76O?=iW>n=`Q(nYNkK`cO32?)Q=ANX&m>X#;VPNAQV1$d%F*aR}Xhov_gw z8r=R_xBUW{PDXkFLi?^fKZLG%HEo7+hL$V@lG-^Z{J7+VYw$G}0k2?MU$xA`bGrQX z$?UUMG9>DW<-T${Ync=ydn;|`d)=y3&kBJzR9sn8V-%JoS7HQQS=qWD+2jAfvi@RT zLpI(SYh7Topt)UMB;=yqYw>)oYrv3DkGWM!h9N|mnmYEk6n>EX<97m**MtMf0=+=| zNvSw!*STeegv#q2nMG!XgiQ=9TbyvdME{NZ{Wtu62zCwvUJtmWFx&kXeEZ*;r$s`xkyLn+ z#Pnad@;}DhrGSVVr8~j|<>=wBJ)r;gZ6^k!Bm#1bDOtz(mx}yH2LK+3wwr&y56%Jo zPu5To+XCp26w>~+t^D79Q_vZqhG?7A7xxF`f9`8Yc!Cfuc0Y4ww^lRJAOuoV$V%5B6(T7{W|R6ZO!CNCvHnv3Bq0q00a9o7Hs zxdcgw<11R7GF`>$cAtEY)0NwPVN(&OV97^0&+Sagnxy3xcl6nv<=!~sCB+3dYu@(Y zEyE6r7XDijuD}eBy-D4nG%ki?;YPLkm6~P*vURdhl@HE0>UEN`49G)Z7u!yepF&C$ zr+&DKl`Z9e%)Dys#$c4r-(!;f<_>+PRr*^tnsHns6M;5SZd0p04a2eaB@wZnh0vP9 z{7-WJA9Zj27ss-Ffd&bZ;E*ub;1b*&g1fuB2DjjDf#B{g!QCB#TW}v7f;)qTH#z6r zb93MO{sZsFp6*XgSM93Zwbx#ImGh`B#}I_CnJF4(t^UW9$#{W<6hGNd!Zb<4s79c9 z81j6!VSogDG4DpzvR_PCE|BaXAQXfSWk#_F7%9!6?u4YlC6ttE>v2uATC8SEDf*>N zd|RoWw{`zI5ngQC;^iE9H}uLoVGR>-7Z2Vv+MNwoe}PdQ7E&GWIcH5d<37k zu0#Q4)79s2RSO_=Ys*_ceKqHkrKu0iT$^6u$;~pho!i_uv@NWZSd_{m3O~1mlw^+- z`~zMylXqF9CQvI+yHAU(+!~jUmhVGN&P%CMkAxSN?R5Ww`8ZIr$n=MS>?hEM;^)Z6 zT^?Q)N+l=ox;u`JS=O0C7*Lwti&xaX34sCFrkzHNo&!3Y#u%C1ogI&+OIz=1t@i1Q zbwrAK4HP=mRQI|3(Dz!k;f8N3R!mK?V3sb)X-pR?&rEMd8XRfEp@^B!u1;JxdxLj( zO5(Y;>MwVO^TEo3Y)>Zl+YI6vRqsf=p^@a5Z`N#6d8~XM5N*o-XZo8+zfH?3gK)OE zR{0*BW9_=`<@oWKD3E=yY8wf(xsn~&ZP(g$)#&t3Jm|%(9?n!^zhbS*?T_mbHJ_`M zXbst}VjcAX%~v%Q40|ljrJjypNfgEV}m4U=nt^(yS@` zL4l}?=XO?-WtEA$#LB_a)c$GFL9^~t=2!~%D7LiY?g%;E+;fLg;v1!A3B!dL-J%hS z{m514$A;5%uOp7~vi(+tBznD5tQ__eZ3!HggUMuuD*Fki#6FIdE|-IgryQBL`*g7s z<9H)^_tI-Vw%Z3Eo5vTXSkJniYqSILe67aoET%{*@o5^6s5+-$)o79`XSb7+;tCTE zHc92Uve>+%j732J{?BF8yzWo+Dhu?$$x-Q~7?8Sia@B0}lt2}vR^D@WbdsWg8qNXn z;r8qUgqul}jq%-T>`h_DL zciS8OftQI{sJ-qYBb%h+_zMueMvE+j{irfPLlWPzK2sbN1@rYf$Z2C)*U?hZ75SU8 z)$GrW*ZmwFN2fFQXC$I;+aBR5Izf~7pE@%*9eQhWDp4QLm9tm9XLz~{PK`7hO+=e* z#pm-)M*P8oVR6K$(JQB&ij5_)!H;l;8kpc5hV?yJZPUHkB_=-ptu=;DP+x^&{KF}? zGIo(%1~G)@=`?uYZHnr8-1+P|ED4b_pu3nTfe`JqDz_a~_>_O15MEqZ-1~rm$?E6w z?GFSpqt7!P>gG6){ohqWR4+GL&WB@SYxPI7WwLhGeHHY*Cr+JdpDzr?_*fv++)CRT zeh}X_7}qG>xG` zuyfgGx`H%ZsLLa#bSH2ei0e}CvhV%3^k3ZPGBz`vg$ZQ}^Sh&U0p>p!@Yq|d zz4>_3AA6H0JO5nI^yowH*KayJZ@Ek{8CiSZPwK~ophtE=EnH&`EKMKrSUu=)WHq(n z;sf$=tupvWJgjU4@_sPrHOqL$zL#Jk81&ff<`3wN-pB&E3dozANb ze*9Kg4}il6EZ1oqsSqQ!S#OEN|LCJrB%5}C8*{?b;D9{(RW6!iz~*kN7hC#DP1f`# zoUPUXWYkiiq1a+%VApba$R8&kxR?878lmta(r5!xPlHOO0O19Fz#NoI?C=Cj!t-Sj zqDDhUST7Ys+OzRm&kb;vS1PGdJ(XR%C46Ou&`@Z~zuhx89L;KVE{Of&;1LRW?W@yy zb;Q5vM5Q3$adSi=j!Kv)xm`5!%lomgf_fm!(EekRVXz{MAlG6ApBvSqy3U-oR=q97 z(SofCxpOS8DE;S=2-ga^AoHwt_f@t~n1C8eDk^#%{pmOT z3lJI=gh4)E7LWJI4<2dk_KauaP=`}6?k%kFccP0#k2yve!<+Di=~A03GAFKhd{WE> ztGO(6d*!kg zAAPO{l53-c&>BVv#ulR(6A70tlvC?^es=6jqaqw&n|GMBx5<}mf9zlIJnd&ITNr)f zdOO#hB+rOz9}Qfr63Y@D)tQ&RPQR?Ck9F!L60>7l*oKgEREhaBioS%+o^8lcfj-~{ z7RYt{V7pj3JcvmV4n^VG7OZuTgI{Ry1I-BbT=nuv&tW1-?qngJPS&#Y9z35c8C~x# zy}=7Ix9KYLkVNk|+y92{nsv!gC3V+rRb?L`i5Y23@>pY6z-lu!gaeCBr-AEpd$oEq zS>d+dEE8D+h+|Sx!e&Hp(7&QL(m;1f-q)6@#PUvZc7S6T**IESTE&jtH|qMI361sz zBsIAcHzTl;$M=lXuLwLBiu?O3imeHdcCnP(GPh3OC5|%(_a%VI{AxIn14?1 zTp`e>f|_Ij}^16vil zU`Wx`DCf!eHM7Gp*0@v03(gguD4}L<2qqTSZ4X`$vsjy6LGU{rG>@{dIlrdi1jLz4 z!@wJ!uC-lUkm*R0qgb$sn-WR{j-X(Th~P%L)dV9}aN>Ohjd$V+tVT!% ztErx8Yy;R>D{e)P2o z^O*%KM0j?ocbzIJB8SC>1OU}ih1|;4R{Wcr57_ux1YIzk+M*y$Z%I` zccpn?a6xiIKTyR&IwaVAJ47oxEOS^cFJ-_OJBu||+D+WC&riR+20d>FdBoe5&a<|- zHjd0>Z+_)eih{lZ-Z82fmq0tk7` zCY|~uflZ0sW)(y4rh%GrD8vPCt?8vs@e2H=jDe88UIYs)6wSmJe0?gatYQ$1CK3^y zj75N=C>3e}8=N&Q0(*wA8hg-RocwMVnE{RP6{8mLh;$+{>k2|{=M0`J9$XB-YAa7+ z`A*iyvZw&QXR`ziArs)-51TJ*18~(gsxaTU#TW~o@QF)(faHegMD=2C?{k0L9c?a< zU&#{m_w7X{gB;rKH!+5HHfM}tE(T!cnMB~C-|N(0yph;zedJ9(V#KUOgo5cXL@@oJ zi8!AhkwALsCKD@3pkH@5c8)Zivr4!iP&Vd;ZYdzK<8v{HnB-c9%V=;AX~Hji&NJnd zSIoW4C#ly7qYtxf8*ZBT5c=Js{@=@XxuQ#Zs)4TIvjpY$W*gW7~#P}!_ z>b#s>!Xh1oF0ifH6yOOXL^db_{yN6mVeTt7S(AY)?A9p``l^G<8_~}3&J?j$q8>#w zEAW~9zeU{LEh0pN05rgg*X6aw4Uquu^hJ=~FNvOQKRt6Y_3%9c@@$KvfRT5P#LaQy zQYM3UULh9!poxK-E2P0khVfjn_VZhrT`GJWJzK%z*1T zh9*T9|2!Bn0XYLA18+$NBUYEtyYKL;-K?hkS- zuv>Ir;BR7>^wu1B54!xqI8nTQYz?0AO?gLFEK&dP^9W}kSES1G-0KUapOPFZN1xNr z*vtY4fR|42P%bpUjfA`wXj*_D>;6D2Id!T#{czIr{Nxhl+9i^NosZ%oXl20IrZO6q zHRJzEQ(2Zv))c&1`MZVc_T6nAOgrt-v&s-yp{;Djn{IskWRjwHK) zVD*N@6^(Q~0z7#;aucBB5f~C;FSZ|VAJ}jnZ~IUiOYv-e++;emMHr&s!q~?`Rta?t z34-%JjP!K;qK+}QS5-)81cKn>my&9;UG4>GiD(XC48Ug$hI9SA!BM0vY1Ro*mz!1q z`+--AMUKl~ktq}E`idj*kO+xU8B{U8>GRvD`Lc**8{}j} zV;=$sFxo%Gb)3#R6eXG<^xQE}l;l$PJ6Pr3z`{WHSzjKy{o3Fg?7)(bql3TCoc4_% z1#eYpk`t(HMdDbuwW}aM%k89`;|!|29SZy_tm60dt}A&&7mlj3~-ild&64e9V&(r^%qY zkNNu&=XzOqZUHAVDXe~59)mjc5P+&j^x>wm=SLnW{i^*?=gFe+7(-Lc3An)0u6 zo?PZ~T$`B(F%xuejX8xNwJ?QUQL3gRL;Y`s!bwKBI_yL!s&pFSoZgvnEs|m%0gk0{ zzL3eMQsE7bAmzr&+Y7H=9uN#fCIgKEu;F0pb6=@fC~8uF3y^)tQr+sON2@=LThR~0g}!Dx6|sm#fV_ z@t$ywCrs0fNFi4E32tB+Mw`6RuWTvAlOTTF_W12f8H#TtP>3@Syvn&tB#gcqw0#8oM2Jz#G-0|XRdQGrHDiE zeq$vy2Y~Y2z<>fVpuHdS>=IXI-xKD!b*gVrqWJVg)xB)rj7`8@$k3!zi~&}|y39CS z@rz}*QjsB{H??l7tFs`qWGE#xRL8Vc!Q)e&w{ZcH0kI&w1;12R7Yj=sFccs<1U^J6~1er|!AJ9?@w`WV@D^H!qIr))NBFQ*K$TdPlPLmBD zEJkOMqsI9{14bDMBBY0_Ba;}8n6!aiFTC+9XJHBrhJ6Z6y$xGb%P%~~* zeq&`*_kqUeq+s(_cHEiRa4(Zqi!Q`07fv)|BCWJKP4T5K$Jr4Et3#P_JZl5)%=fWL zjz1PDg}l_H)GkaOS`8G&hn%`A@Xq2*G{yJg6T7x+ByU?E7vX9Wnj>cUCO$4Xem6>r zePqrzjZ%f{;;)*|Z0Yt{3`RB4|MS|IRXS6REO^3V zWTcPem&cPsTybYp`NMchEM+LBLD?t9{{@&K+L1Ak9jnQ%q!#E8!X`tOB*IcW^3F_FpIf!2RTX}XutAQ-v1@Z>GqKf8^Xd)QhQxL&R^|Cswb zI`Mx;>3^hkiQ*toGD+CtZ^-)JKlrcQ3=uK}ku4B}5PScW82R_YUjl*#eR3DYtBGn9&nN5G{3|(y<J9GjdK`CEqKqL#>|Cl*ZnOo_-{8$B@wy9-=R*R< zQ3yDdc;jn#$A8}Q20q~W%HrY>RXpvN}WbHbfe5%4qHLmzKXeD3uIc9 z9G6U!Ta9g3YK5bKe7K-cuge{-8Q#auP~lp~rL^<8wLAX7)8!tPi1>{w$Pg2#;g%?Nk|jljO4B)MoNHPXLX{ zP|fXeol>qyyc-+xZ2Xa-+#D{NcMG^<%3Z`jeLVzf9#K)JGpRU!ZzrpU*xGWRm_MI@&thH~!qD}eu+bDC1s}Nw< z{~12hA&^%(iGFjMLA!kf2kp~rC3VFLTQVj@v>1wyy-P7=e?p52Qr~2PO-vZowf`71 z6;KP8s5iYck|@3G@`;@4V8`Wd2z@YU*Fj(Av()i!VR)OdG2?|;a7AfEF%`!|hg?V6 z<;5fEntQzYO`2+f{Z#L!aX_oeA>_N za{Y0{R!L;Er(w9z8kenNKLS=iDvJt$L`4Y)h1IRFL6Fj{ohFr zua4T!djD|{6D-hI<_vK+R$ZT^z_PALj&F{mY%o_frZv=sNYQpI>KUC3-pZ}542z#d#%z}!!xah@I!?G%tMOqK$)}#M z??P0g-|baS7r4VL6?HtWm*v7B0@o|P+|A_4Vw&@&Fu6pPYqm_-h#s8sK&;ye&`9^EZu zq6wY8N-_{AbePMuwLLlOhjA&_P4%NrANydwE#8aYF++N=0m#SeT*38u+YEW9b}M;g z=YnPR^T?M$e|b#z43UYm#TsMVrfW{avF=x_Pkpgb4w(;ZbK3>wEY`58;=pyEztoT> zOrbS#VRDrhK7S&o&m_HPwbqe8WdD|FyVMFkCCO}amEef9k;xL^y;Ut$jkB@?QmuKq zeop1ER?_!Wo9rgfU87g^SpeX~7vgU-43G}P$(W~e{HQ{@eSgxYkVmKbbddPsooeg4Yrr6Tuo zb7OOJ6Qev@b^gYRgPS~klQ0&eqCDe;?1o=Jp*v6sA`r4)($l)yqI{jH%hJr%j$cMg z)+`bF$^cfg-avEFt=l6colyi@SP01GAtH6Un%h}DLyAwrA^w~>c3(JXne7ougghr5 zu!#n#O2^8n5g}=A{&bf4d;%eKHajWNg zG^a@4ag0&Ch|xzxRdY-Fk~*{ZY0^hpv&+ob9-AY>U*nCSut+JaKOvIrMj&lQ)AxRb z^Yp9z$up1Ug$mGyTH;((TGELU9_{(;IrGl}n&-Usn-Ta;Q=OAJ6=EBndixsWUOZ?2 zT@{-I$>IQ3NuYZDt~}vR4}-@T&#q=X143qs-T{%<}3I3k%sR+K``rb zim6(Szx2$zTf}~g_U+?F3|*o;o=3bj*kOOnWP^n$FdTRQ%;eHDNs;9>o#vDN$s&;n z`PJ+-m*(e0J$ltzBReagjYVJF523Sh!@mYU02oOdy==Pf4F*qux~*T>tie#F3$J0@iv-JY^s#hHicIy-EfJTYn`lE2R|^|8XW?hp!aB1Ag*=hKWGYkG4eqSNHvYQz zm9UE;4RJ@+&q}D!_Y%61zqIQu&~32r6Yrg{E&cLY_+Ul)Ac=|K1R8xcCFh7uNLr%A zYDo&Y-icz1(HND-+46{XhZfV){VADSO<^?X`gmnrR4j}0vsCY3KL)7Ln8xKT?e^tj zt9yF+G!P?1C<4ed@cP*SPwwSlrmJ{oG+jN!mzTO(>vu&=qWNI~P8@^29$#|Or%i?< z9oU))Ox-jmA!-$BhRn8xWYl%M_w+nRIMChFpPy2OH!pqJj4R*%5V8Np@bPUV9y#4$ z^X9x&&`DWRsK+nwPL`sH>2K4yOi66BBcNJYC-;|==+!$p5A0?i5sf`lN!YDaWpk^X z{Fwa#I@h%vR_Q44?f5Fqe+Tmztx_4UsX{!Pe!}L0#q|A$ ze%5RVn*52hq@Eoh*P4ZSOTK{>0nwqQf&i^3@Hr$lC#`I|m<0LFFV5S6M{;8%4V^|Q*f#yYlKyMyt_e?_~3jxl`0jLl(5lmj<{FcNs(2e)3cjFP#YbP^$g~{Mw z^cMOBP{i{-zZR7##}fP(!0{f-au-6Hil?JicF%jVD^H(FREt~RGB?|UrmJiG1R`oC zf(aBeaIB(7KX!mXiwX@MdN zV4VxBJH12Hx5g~ZLjqups2E4{G3723p0!Fv@XUpD&Jz9)lE~FjSIdcj6cMg zx02kseb6mRkW+LHvtB)K=;XX=<#l)*Ec6{n(2R?t5ml(}up*k_i}l?}k&n`aWajC$ z^?#vc3cqP}9lshZ*x&oH*{7t646SF6Nv>ZRpG-YR-8M9&u@+{r+F(w}0q3&3Q-v=) zuUmrjjHPj{V!+z#R_(Y%ogMjD)ZX#n;xrIy8E-c7o~^N51269UFVXDP&JZ?n_A&PCyM1prH2noX--w&S&x1ko>>GRC-1nN%Y8OEs;Iw z&MB`bM(R|(s4m>Pm==!pyov(QdFj;`lLQkZG#?aGmw8|*s0C24+s!RDaA-AH6=mX# zuSLM)ErfD2dg)~_xaTO;5f1oSU{uj6Q%*RaK!}|4rP{g(QbSxA8tC~lW{5z{;S9pT z$bqoN4?t{!*7_-S^Vdx>%ep+SSEhgO{}>QW(&2^T_mJn(yrew+KO$?#s9ROB1L6I_ zR=qJ0WtFlXucDS~i0DG>%2|h`E?gPcLeD3*Qttj#pf91X;d5)GoTexaiU&2)qlHQ{ z?^xv6wItP=3?Y{6G+!lk;W5vrMQrXu#Ubu4CC{rz{j~)g#JpYcB_^NQ`GbVdatM(f zJHcTt1V=98+-w;}GSS>uJfz2gxp^*a?`%ELC`r-|yn@bcqSh6s{Jh-~e&|6VW z>c;432=O7WJD>m@@5gk1-VV|P+((HczmQ}Q?yr^UyOw+t!?G*56U2M(7A+#>$!7MO z0+MLHZE5#r745>JyNyH(rl4k?v>hb`1qbJ|B1A~8!$Tg)(KNf<#i%&SyI-T*#SG-sLbJrETC-EdEqf5YBtOsD|#x zBvgzHf}UbZD&UG@-ok4T&c&_q%a2)q)eIq8N2%fmp3{-=i&v)cK+k2|0^<}Bg)i_M zldpH5w!va>96a*DihTp$IP@?}RNGqdru`|Qh1s6k7S1+`m3KIV5uUDNe0bJpC@13# zyr7ZLvVFNHUI%{v(Q`koDLjVA%hK*Dz=+yT#7N|3_ho~<3tuw{`A7+m1n>Y69#pyE zt~iPscj#0$#kYfL$HqujKA;#Mx{oy^qdY`n@tFNFfypZk;AU|+W*wwngI*N-9jSM7 z$q=m|At325)>=wyXolicT9gadNUP0PAV-3YKd$a7n&5DRCeUk-bgVXJ0~dz4qu$}n z_$7aH4J94-7{`Qx?pUosLb!+;wSQyBOC*owfaTMUl!ti0Mb8JGX}~5FB>-NRlfpWZ zL~;PDqp+z%ULm2rhRClo3n}Dl2yro9|FGZOm9m@o&M?Sn3>nUH;R^~>XqoA=suxHjR{buncJJm8^)hTd5`x!BUvkbT#0|yVT$znU3SdX2Hbx9O0ix}OWF564dy+f>s3*kB3b8fF zM!H7^{65@+8?8gnTbcUxRs_Rgwc}Y#>)z*ZyANr9anvSUq0POHYVTy63U|Uk)kK-~ z|9Uyje4T+mnAj}6{Emf%MfyEUd~lLU>W2-TjZ1m@7BcMSoq*&W`r+63_T6&F0P@jV zGHnAU$dh&bG|%hmZES}&sMir@YH!aDiZfHQ4g;!6Js&zeZo)NTtE$O*7=CT=F7!X^ zQA|`|CDq>LUcLOhyXr1ION8(S7Ne(Df%%D}sCPE^Oc&D4>>e}L38{`gRGM)D*G3?_ zQAhAT{D;-(#?iWJ8)vgrA6wC{z8TtNeF!(i0Zef3+Tj=H!#EMCmXb}zj80zu@05(1ZE1^Aiu)HkzICJ9+^y!TR6eSJ8Oix=msv!Wl*M_&M0W5I+)tu&h5e zOkx`M`Q-X0_+cnOO9>$E5cx}VWrwK{r@9Bzd|Y*sXm%dv9dME>-)}ZaOpdrMxggT) z$i1h6B9B=?+)07*e-#YNYNED+c*eCCq-mNUoWNfdw35I0*zv-5L^`};gZLfn1>~%+ zLN3~u&n};&O*ReHUcSZ+cF0Q;VLMx+`O|3X?fpK7n*8kfE6rSg-m(ve#&S3Zq^q>6+8 z^c~Wv9N51uUZ)B~;h?cg2=HQcFky;m>?SOI204w3Y094{58m-kfBJTih&Un^V} zeZ2+l#+V`thoCZ6EH_sBV}wQls92tmMQZpkPG&tHY?~6kT0MPp=p(=y=+h-1(jCxJA4I;-sTClFqLOixmC}Z~&;NTz~K&k`6 z+lGFTFx@U;a(W1zKl0aQ(;z_?fyk1Z%?kIO+~vrJ!TT?SaWuZZz)zDjv5LS4RG}@y z37fI4HtNDwd0A|ID`zVe{FGz)e+*Fk?*UR+-1B18=Y^W?r}{BzYbAjc5D7F*rBC57 z&;2gSNdo1Wwvq`)J>oNIOXvNA-U!Yk99DcMQNWjK<``$ZG1wNbE62S*9wplEgPnYn zHuX)?3Brda>m}w~gXpQt-aH`@zyO;L8F z8711$xkoGCTjNObZw8==mBFmnjFU@ogrJ!nqbD&L#B6>9VaUx7&!-*-`Q`5SFbP{T zmThB*I9%eLEnVrhN>nu?b%G}z=i`lb77hIq+#pqWDeNoNyn&fi=hVY74-m=?=}P6B z?@j`XyS*&}tr|!X3{9{>-t|iX(*$-{~{uQ+Pb=@oD-$pZmqI#jcGGdog(07;htsI`fuQ?caZCN|wc>hBU2@2f$ zxrVWZ#xW(?DzKCK_e8$L^`EX?`4Ndn7%dvVu$+6N`g?uE<+thg0(hr2@;-9beO>g=rysl|M!=Jebvhb1ew>VVsu&_b(89P{I_TADouwQv;q6B$uF1pj{;g7m^4 z3qt`$C~gx2P%Sq8>9aU2G{Na|%?jF&vy|^}U&xM=?CoC!>wLzGBqXn!|IY>wE7^%O^^mp@!klO{YWoD&+ zJEs3_gUA;h()O=Id7H@pvycD({cHd0gq_mVzvl7Z!$l+s8~$EaT1hVLKV9{o#cXyU zCA|q=EyjRQth2kVYW^J^0Y>QidqG+Nlx*QY<3{9)_JbU$C7-bV{l8EB$LzoD<;eEi z4u_{xBOm_9eE(bw5x_!Dvfz0Tw2BqYv3=$)f>4#*;|4c2VPsxkYMfS#S<(L*^9_+dJ`>Swxo)4= zWu1KbWCYpwcENoixsDOTkk>Q$QKtX5NG!E6$`^M#`c%o7F*rOqE_ znk|Ku+eI9-4e}|FWjaD!l-1qTCTi}A+M&_koG;K+*c%$EjwDbilJw$)5!^c7f(4DZ z4mE2K>OZaY@;=PoX$)U`{^!H@du;r@1c<}~s2XE52YT}uXDhWwsZ|y#^+w@;p4wq5 zL`fx#smPt#$!^i`Rn7BA(5$;(br20JrBO6p-YguIuEPAf9rP_o2sX=NfepuIupLRD zi)RV8{O~`+4o(!%s=1C^mo`P7o}QNJ66_eCA{~^~bRTN>*e&I9e^U82)JUL}4kgUI z))~sn*$v^YYntWYrC*s(rhmm|NKI=fO?QZnVBeqAWyGG)N5~wM+5YK(n%2>Fn~KL_ z8x(c5S0c}EX1msEipybT@qqHvveo&_;);u5U;qB66qU^l?ws|S2l&iTtI^iXJeo$e z(gF_k%#OWs-DoBj+vewNVPM6|fJ^$x#U5RHS|^NWs-AZn-l33tX6t80h@cUNr|TJq zYMCk%CJ@gG!X7t;Lpia6AlRRdoSv6moaC10#SeqqJjI8bJR(c?e)N!1Y^&{kCJcV_ zj(IHzrt&F6UEi$IZoS2LB7@s_)MslctVm*t#8HoJrqP?m6Euuj{5?mZx3NjR3s~gT z1aF;gcsI2hee52^wu?l-^kcQfd(d7lG|FZ?$M9Lkd{af3_Z2^eSTem{yGK%>+TeW>r*Yi;#?S}!qJcDdiF$J7VZznW|8NA{PMd19dc|;44;}Bh~?4a zaK~~c-<3nT$;U?0x0_l;Hl2fgrSZ7hxHCASO1sy4A&QUw(YmT;z=+A=-uJRadGdEgiy+UAoXx8JN5P21F<#NmjLHA}>i z7e^szID2>JPcVo|OrL*lWwA4{3{{g-~ zq4soWQCv#OyE!?gs3T&DTD45Vcb(Ok5NLTMnc1G9_(_-0;5$BuMq0g-pUzo>Deq~lXVOb&Tvpb5 z*XN%M8V=bT%N5F1NxAr^@uJ~aO!7l+lJfE#gHb@#h7zw}$Gup}!BM`=a4ye1oiR_b zf}=J(N$Fc zl;?0NNd2JS>qp`T?kU~1D&HClY*)9xpw}&`r)nn{ugvruUsgRrMy2c%N39uDbbRv_ zda|!XeYw%jCNV`inZNSqdu-7vo)Nh|k9o9ekA7}tMW7H9=efdHl@hI`2msG)wVcP? zWrNvONX7gv{_8jWJ&5iXema5^EA%5o)4u^#s#C*vvHe%cVU8X@_=mO)6`8HZJw0Nb z`?${_6D2&(XZiOg2`eeoIsz_#0GY#u?4)%LuR>@TBRbYNw1q@8^~`Shi{VBPKno%- zU_!6cZ1EuBPUSKLDP}o2+6)vQpBAOr*}VQH*y1gL^#Lg7YQ19m4cmISNvdIly~8-N z*Fg@68B(x=wJpL|#tP)MU;M4hJ5MMqXNVKz6PCyx>+v|c!LnT_4VKHj@Y|BmXZjlX z!fDZOEiC zGYl9v03j)mLvqaUc<5!z7X*wI2T*nmI4|EkpAP6WI|wfO!==~FbFv-gZZS)daTmih*M%Y4Q!ySH}q6#jbp2FyKAq>EL_tg(;@&O=h^k}R$ znil6yzQLxpZvM<<1j6bY;bKC&J4*-fZn*sH&J_Sh*e}G|2+R|n@&QuOPDLT9u-dbh z$*5GuX^>qDxJtL7N6!~k7M~8h>c}1mo+NI0vCrPpvRBGCb5YJUaoL2@!>IwaN zGPst<%lNgQ_Rh-JcT}e{;Q+@^{M8786b5Qaz2|dU4@x`StgL4eo9Ho!Ma)HOYP!7=FRZ zVEdceRQF%MYxNm<`0gW1a`R{5VaQloBhy*O@Ym0V?k3U{i?atK2+^5ig5{8|LY6-B z`ZB7N>(>nzSI zul@v1bUR#7h%lzwI+IHSUjEFBW~4UYEwpQPH8Ln6_TD{3OoeL*+4%;;U;~Xeeu$8Q@9}e;_b@)7y|V&FMIkhwlZHklQT;ne{b<@4W&yk`z~s|PmCEnx4S=a z)w%U_yB#*J)hia>L!Y z>Bc~;a?@lX4Y%U69S4w)d{wFyz!K}a@Vm+{l&uGWJlxkkkA4oo$e=x-U=y_S=x8$` z5l6wCIqcrHF2nOhJH>7fnn(s!_l)|bI`GQOSL)a$rajd11{l|`j8mIuy?a@A8!&2b zSr}nPb5G>CTue1Zvxg&#$|dqN&8F=a(1_cF6~CLn;U)m2wXp}k_s!bm&%e%RR8J&lMiArS&FFP!QUrnl#cuIgN68N{(Pd5$p$4DY zAqvpr8~yLfUCTx?fuXKkfSbbX3yGeUFQ)8nB+Uk8*KFXJaZRwYYsL3g^Si2i>&P$X zxez&@d6phXL3xM~vOEVxNAdLg(?r=cwptL`s`16IZiIklr+^4!yUtk9<5H+}C^`lD z(l$@DmquQII(CSJ4~$00RSsa-Fq$Ls5m};umrwi;-^3+JR0tPDff4wsrz0g)!wl11 z!J0x3QE#&+8D5u?3JwPcrzqx0*sXhCFndWjf&Fs1#wA&Hd*Y(Y%*3RU;|4gm?YE1D za3uF(fnhVM1ps}@s~1MNk!;Umpw8dgrj~KNP^CXvG;0PNW#!E|ZYzYDjZakvzdW^E zsl_@S^*o!_oxXNBSUZAj(!amHGoWh8zA-g?Y;_YhX$7b=3-@;~_GVX!L z*QOJY=NFn$U)rn(^cBB{`~HiKhn&-uUH)Njjw|5mEN)kZxabehkQeUDWSmnLcVn-s zx7$B>_S)AmdHl}`NLUH~qXlq&Z8^(2!xNP1L6+r!zk_>G2cGZiKuxBQ0IByYY`rrV zap*FS*YjPVO>3IHMGOg|%u4?z4QTJa?)W!ak10bmSrCiUncK_zc_lCklZ%<~YCdOc zU=sH!r#F!Qw7LAN*?SvhK$EeZ9wepCH2S`Hh4VGO_>>Xcggc;GChv2mmYU+{fwu`} zQKIo*vFVrKjpr9U%|E6R6zgK24Y)@n$$mduji5XV=O`a{ZlGYQE3`)cxjN3JCNa;; zW0(XKO>TZePep*bT~7Rwav)SfNTI2=mev2}_+B~BXa~IA&@Q9YZ|wi_}BEx@aZv*0k?WjPU0m@#d&}xw0S`} zG3X1%Wt1C)Bf0wR+r2l^d^mV=IHVnI6gV7VQtb-?O?n_^%?Xi}=Aw)y^!2)k9Krln zvs@}|H`E%DkfX7c!3<0lg=QvR4Dc&qiWgl*8DHRc10n=U(3=UW{^#6DKVWUaX0z$R zVMh6*0PhWtBh{4$m;pWzJ_}>w%B+l5)Uf0Y%OjxU{LnrQ;=D6`!8h9jDoH|{HG!MG zINZ&4PN-2P_QsNEAWc}a??jRtB2C8fF%I~XY>rK*Js@%#y8w;lUEm7%i-UP_WO0uY z@u}`FQ-YpPMJ0d0IWcvxN@fBZJxQVD(BItibue?ne;)CEA2b%&5X^@6(V3GUP18P$ zLfVW${~2KrOf^e2o!RPAZCil1V0v2V=O{M@4c%LT-sGHGT^Bp=J`_owA+5muSzUnm z1Kp99W>-3rAaU^9se#lJ5(N_a>l2`baT1*t4mC7~hS`Cfjn-xx+U0V~Z4#t25dxkH<3J`9jmdcH8aewNN*U9)ftLBGlNhEduw zLLDTL&=WYeLhWVkWih{<)gEI^%RtxGj*9Wtx^JslJ@pU`b`Zyf`gdgQ;1ZJXaFk;f zJ_hL6t$Nx&>bF=&(lJ}CC@XgHIaXkp`#oYqX)Sp}q0%AM!R9&##KH%X938iy_RdS@ zfstH0J;^0#p$U_o8A0qrY*Me(1MGVt`bA&^ULK87bjfoe120m@X&R{HZ@Hrabx3n% zw!PxZ+wUv7a|X{)vKX(Wn;&g5=4N*O05FEopBd`ySVF(s( zp#!g*Ol>Y29G>@5inrpT9fRT~eRVv{<kwb1GeCg86s;wIjQl)s1%U*PhPyc0qb z1cqOz5iOFU2I0-yHnPmdPB;wqwRk=zrW`1nNtxUUGt?aPnk7c!LdNj%5Z2NzGy%CMK!N4-KpJVI*ImDt- zCqW-c^c{`*oyOB^h>@Rgl+Bl%sWu|sA`x0^58XvmJC66q;~gRs|14@Wy(necM10^`!m6x9y9&$l^d-Pu0-lOz@*jHS@k1-sz(! zpV(T8f26qmi!4pXd(~8wWPLQ zrHF55mN!x=eLNJ4>GWKBvajx=1! zi>h6|+279wG>xsFih!SG(R;nN(x`%qicx`wS`fyY0LKPYsM4@J3oc2vD+h;B%Z6oP zm+^xiywPkU4%{lC>@t;1d2=F6bl=t-x-n>*)fNm6D1>c^o$dB}Vy$b8*wrp)NjwAI za@c-17r1I-n(_?!m~Ee(kE8c=5a#RWwk>rxbI$d5M|9jl#=|2#Lk*{N>gzQB#czo? z&?J^HLQFU+gUemT8r@d!;&nsziXj{6Ulam5Xsa(_#IbnfO$!W(W9I<|p}^1iLltnH zDvkPAYJY6H^@w83dC}f{mF9Oqrq>TYNplcrq~oRO*;e_>^+&y`-2cuFA)vMKB$Vkn zc|I#4e@%H04<@V^G*H$}+N=5#?2Q5V^S*;2uAw;8{MVfb)FME#4kod)UjO-&kcA4! z4G0XeNCV;m)tvqa0RBX*F^F^Ef<(R@aQ)lRpLgE?Ab2)bHOj)?A0YZ4d4(W9Tr`Z_ zqW%9f>p*1yLAw>YxpmUZ{(Sj=lA}^Y=+K`8B!bei^8c^9Iex#B5>Q9>HkE%YrT=t| z%3lQG8x%46Cy%KA^Dg8dS9t*11M;!yr};|7?p6v%r=$5QV@Odxkjvi(Gf|)oemMoA zZy_ohzt9;mR|F}9P7jlhVhIcdPg=>2vfR*Vm47o`ZC9Q7=Cb#2!jfY?%X;CHx^jvb z7>dgH3=a-#!cm+l&WjX$r2W)?E&T`ZjYS0Q-s|_Hz};B6P^u7mmToFttHX?Krd%}s zXPDzb6HMr>Y^l*y$q~$6e75&;jsSdONaP_N*5MP`)Upa^MfCwsb<=QjqG6-WQYuZ_ zTB*(3tDnl-PNKSTs$uoL!LlVvg)(6nSxsW#t;C*3Iu7bOuKCX5c9mIz`@Hx$Re_0g=U19RDA--ZCtzw|oCqKpH_B zNu{M3LK;L$QjwBwknV1zOQePxB}M5$y1Tn;=UQ0DHox|C~zfmeluQDsyj1RkIMxTC<=hF%0d0CA8QkKM~h-9K*qJK>eW@& zKu3{+uh`+c3z72227Bd+BAffSs;YZlRZW1iJm{?WV_ooJhh$_fLsa1)%Ume@m+`tc zuk|eNfk)BnG-a2WcUnSZ+8C&a$9(c|2ts}pny7sqL8Mp<0N^7yShsVmnA||$!#G=zq z%$lA(?tH{}?y>AUqLgta)vmMgkrM8gLfd2Bo|O=-szj8_at*&{?HX51gT!T+cndtY zrrCdR*usJP`%C=JHD?QIMf*!e%I<_KpAvR;WP^^zzHYVggmn0m9k&_hqRW0<{wp%% zGF>VcjYm>ugiIGYO`sR(eK87)@?K+{t(InkI*dH-aCkdrBC~N_EkBw=_%7TjM0g9` zn&I;2Cyum=e4v=9#>XLf<*KR?UpVNA^XtX8QUFJYy{-zKV2)O3C+Yn`q<)3=NomoG z7HIJ3aC=0~wOmX76e?M$PMTYr-U50(RXy2v8>SWuTZaX*CC}Ykmz({14i=h5*mY{# z7aP4muG!ak&|{7TKEc5c`pvF$1{**Ei-5<`VnyHI{w*LbnDi&J+5GJcS34|OSKH6q zx8#~Sm258BIGfHwZ6JSHy1w4t6H?>P)4w>6mQe>E@X0bz@1KS>+f3#K?M~G+fFHU) zx+wHpFj0DVrovvlAN_lP{B;lX2nPYl;eaO<%UUjm;xlb|5yY^Tpy6tV0kt`sl@;pQ z6PL~1;C{fTAG{?<(e(GNZnHVtmT5LjhlC;sOPT$G6)ove$ve{*HmLK!r4{R>dCp^z*0fb%dC81nx z(Lj3D?7M!42i3&o$e4C`;U%EpMhPWhq9qTxppcE0U*|2Xz1=T>K#8p88za4TlgSRr z??xQ_VM^XM@2=2<9<0yDHVYKtN-{QA)ichut{f^@46=b(U-)ie^>!{$@2(|FT4&R( zIwkP@~^nRrM5R7KVWAXjUC{x|u@$ zNv&TthWfJsvWEeaG`|CWy6@j7B*LFkRr+CR#VC`fmsgDU z#F!rAY6CUzZ^MuiU}sfZhc4^A4j6mB^Yc9HPsV2bYx5RkQKu}sRPoi}VjRJ!RdBPk7 zYV4P=H46QDrhi;Fwn;y_KC3bG;c{(!)8%Z^OJiH0;tfL}sz^JyyD_t)J>3mzKIdi? zw;n=ZNx*cqUY9rYB44W=MwYOIQeRky#ZehCKXd5&FaRv{k>cg^;;^q_b0;cG!o5(r zCq=)NK2nRmqDlUJ0IDA7xSubUG4(>XCcRrS{NtN-Z6jhmTIYkIUM5AY{-N=9I9td8 z67uv&!j}0pI7yNwz&&_}#6=kutoKu`s1Yn?+FV4hi06&JW6Ye*JqrZwpk_l!hxJL* zTVK7%re3;t{fZU(3*CX0$=(Wom!R2F;R%c$BTo-`UI~A>kFWTu-qojdIA26X7 zoH=J&e|2EB>}8sMnfOzup?(+0M4GZ;v5bBg24J3<4CS4sRDA21;zgs>7wr1AXPuJ2IxXyl50I8rQVI23 z8Y+VWrJweBF4Wpb3wc~M!VeB5Ts6AayKu-3J-x3FukF%dfqjzJ%-eB%w5Sv<5zYy#9pdnZ5hz z-Q~XfR-d6hvAQU|T4QaS16Ki!>ci{BAsSRlmODNh`y}; z5_Mdv_FIp`C<<5HH+Y}&oZpTGyU+io-;RMR=I$74SybLnIQ1xIkQ9N zSfrCuMlJfgf!E!hfsY@dlC+a|XyBV-_jtvRGHr91;hVv|wmELc5^Ydc(Xl~E9r%`< z$HV z=(PZ{5YL5}0rT#f^;nT2#(ly{e@-b7mrzNoucmd(lRDglP9budq}mKNg-7ty85xOs ziB8aM^vAfXV)8cYiC>SmaN;A`Oi|)FLr<>`&Q)ifc3M=ofaB_AG#{sWpIw?~wbAzz z>}W^oW4iE$J=s=Ta;*RPNoz_H(i*P#Mc5bwd^O-3}1X zP(nCoX@q}%B~h3TBpoc|6ZFJqSABD(?K?Moy*t(bWwFvNc??m&B3>Og`IyFUnmt6j zj`D7PaCg7Nn0C`Al)VD8u$ZSJ)TGI6;we-sg)S-e1{+?<6sI=_6iU3FmcooxLB(IU zXTw2c^V^oMjnE8FRtewZa%GrsM19InJYSVs;Nb5$&x1iz(&M%pSy|USOy8R+!(F2wS6S?KyiLx9IZf#U#Q@yws3KVBUI zr~UfYUWtM<%TO4Ew%@yTWx2X#q%Tj_k7v;6`ws(h-Zc(TQc1| z_2E-w#pjzHsWly8xok#N%h>gIISbT;OKYWMC4#iqH2CWPJPH+>qel2o>s|dm! zlB72l;#dfG-C}7KwG}6k?ciND&`*_e^cPbySQ{^rb>8G;f=;|%>%S1>si6v{@Uhs( zlH9_@U6k*@g2uBLyww02{nIgRUtX#ta9fl*-e2|qK$R>0lBA;otOkPmdDq2LQdja6 z24FZs?^y1hhVA<|`kYL4!Ns1h>6HwjItIsqTe884F^b-!cXI1rd;|ogElA@=jFRwcs%4_DSxzhcvz$>cLGvu`qnx5uKP#`PjiGQhu3SLO8X)ND1 zswWBKVsbm0rt$~+uiiO#qc+pyVc%uz-%Bg-G#l->tn*yRBneh@7b7RXn_{egaM?A) z#XK=RSP_nY-*<^~d9keC-ycuE{%HLN(UGD*md9W|7O#77Qv50J&$kWCozD%AKE2aG z9z8?-MpP&NAaB8BiooHaTg z9W3QT5x!tnj{(Jz-UHr|N;IuboXQF%Vjjg9-eI$+l~#aa!X!wyVZ1VgFGHn@vh616 zFB<>dh<@_#@WJVo3LncYhjH}N_Mfbv9p$bx-s_b&3IY0!tWmNhpn>O}N3x!zhf6IP zC8~3k`6U&PO*?Ok1x7NUv!XASaoO>2<_HQR@@o>@$XhZ9Lr{Vh)!wU05>2bEYeNY`3*MBZl~ zH@f|S$nZomEJhFnD4_><+m{csw0kT*2=(xUd~K-k8Od)>THPc2jTrMhMSZWGe(XT& zw+f-SoNl`pw-RgIK3QIR&GwPSA+@5mqU{V77jqKtv|f7-^(>sih1s#=X7rujQ4!AV zTBRxt=56adAl%AO_DJ3Fla741JDZt8&T(X&UN_l%`^~of1yov>&&+A)(PU7UxHLW# z?_RsVl(iiY%Wol(Ek4U1p*6wH!}$Y~@DO|@G$Sl%uU)!9-LpNCMJBy*JBp8M9&Y}u z?MD8N&XhU(tPhmlS^xLJ;ynI%$pzvuF!s`jBNl^)V-7q7M_NC7Lp$s&wMxbJKv)WfZ`7WEF$mrM!Ma2RfY9@apX`?xb_AY7qV?R z`s#3rOZGRfiJFzA|JgZh+OhvV#@5B|M8W5&SK!jRd;E9J_o&-8pc#pGbn%2nn&3G- zy-6n==Bt@OYZql;aCmNwyhTXX_u5rN>}gPINNpmNn0pbLD$;}Ow=LH?QG5F;%0n86 z;V2m&aYkw3&NxV6sz25)-66K~7_6&5r@+TSE5kzg2bP|b~0;qUtFm} z>~TTX(z&*-s+t8?e35o3_sa55@-=gu`E{$X+gi-!_{ueCnYV95A^Ee(1xd)0ZkDpv z5Kje_6Mw9ioyMjy?Soh&(TuvvnKyL-9zC zJdm1^lex!Z+`jDAz({PtS7#6M(;5vcJ{w=AB6MUdhRF{G-yY95_YAA7dtcsw@O4#D zg6~(A1D$VH0of_MLpzeFbw0-t9qnwHK@rQ*sb{t|W`haqRd{Xvq=TB^LY>BMtqM-f zVLQs2+4Mg)^WijtMwGI~E5UTU@>xDYn{Hp}96D9B>m7w?O|f}sKhKnyC!1Ktd`Ap& z=B{8ANlXt~HP#AE{nT0NrgJps&W)IntzAj@co+A;rpGcN-wveG(q(B`Vc2y9t_5eG zHc>mYlS)65c;(xT^TflNd63U^s(A+}k4T<;1AE}Ac#^CPABE<>0FqiO@qZ2*p_0XQ z(f{iK@SG8@~53##;kHrw2U&#q`FlS&fs0ncrTXsdm&7V+uG}Fi$d4?vv3>J zG$(MY-;ux5z{sXQoDIes68^Lbe-}UAZ&BcKhU=kyIYZEX)|MvYq)Kc$U)S<{rUluC z*fL>f*Qw0orzs!4KaCveZ$jqWZtHR_z#gZt!7o+0G8}yGe_LGs`m^N%L71%go`Y=c zbld0xD?^0^$4<6n;mErS$V9rkp~nr$XlN+n$12Aq$i zGS7hx#ybg<-j)eM)Rjh}a(Zk9%v)~zMRID*mm!Kt?Dlw%AG+_pY%`pJJOYNHto8QP zA_I@zZJYL`2B8fzB2$nuB-AInrbkSIa+1vJ`jDc1KhbYw15>J&HN#TPb4_YjBkH|D zN91p=fx;!9agEY+>z^xPx2x}Y53_Uxj;Npq2US6dise>iz@4u=;4^+NrjWJf|LLyK zJNDHT@m>!!g-9Gk5%hv}nOC2tvHhC@S4`Wo0_K@zWF=A39o-xM&bl*fQ}XzyG3cV( z9YoRz`-&;!UE|S($i~f$nrl`N-J__>t>SIwh!@!W*y)0SCNH_YdXH!Gn=aW~ zpHO~zMCMKWXfcl*6K(sA&?l&K(;wflIo}>sv~5Bq0*{P^8c?xKj5<^M6b6}x{VOyM z5sr}Pl@Jv*J|6a!&2l-tAGLjM$(7(DR}sMjn9t~|1~m6YrlVi%P$Re(!z#Lz5LyZY zIVO}Hk8-MJYzm68R%S?mW{>h~M8 zNv%obpEe?ub51-`_)TQq?DxS**2B@`pA!4bM`_6yd*7T}C6U09O?}6fppWIY!2+Ny z$2JesFH5v1RIK{dRn(M-huXT+b%kQwZfy8QsAvSZ)&6-5Gw$($LEtlb8m#d$DNl4e z^{V>!WcHzet-I#1H9h88oKYTm>VwTz&SvtC$8h?v+ZU%VcW4w)#9440b;_C==~`2q zQQN^|zo;7GeRd~M=#yo^>zBY|@N)B9?F+#qCO-Ocyj9g5I)&lb@JQeBkrrh~>_B5D z2lG9PV3LfQ=|&F>sq<49XyNdpxs+(Nl!}E}PA-exZ+KJ%>AX^;1+s>0mF`Kz`)!zv;(8;RJ*7htCuhk)*h`6M!<6fu63x%N` ztOK+l`Xx*-2^<2!uFdD@wys{~vVg-bAH$QHVEvIaI(ppF8M_>T?DUeuTx|zJ1*#W# ztd<53mScToys=}a-Q6Xv|>F`uG!FEf)aw+m_M7}r&Q)x@>KJ*ewSPOG3Yao3FN8s zX1|h0GK>c-SDr82Cp{sABaO4+&V=DYa`wtsL>E54_e)Ny!~NsJ)>{R(eAuU zb49U%CM->17XBzyDkvwY%W+PeH+gMy=|+STN|I`pVUmcEc#}tZ4SQpSL@O)O9(EF?Fh~& zxt@t`6{t-FUS(#v7Ut*U{oUL1+!%}^`@AI;)h^6o%lpC4`TUau9?GH>)|On>O+{zz z2AK&>Iq0#M=S(IM$T-gPx9k2~AJAnTx#F=Y*U_Ra!iRgE3(2>juTrRHv`=+VXYjbC z=%2^0DSgzfeI{=Wwj{#=CIbJ?;6P{#$493jMM@RO-ARvWH@aX=W^+Byf%A26jPbB> z9|}5JxyaU(g-XcZXKwG-c#< z!O|pIJOx>xfM!#0Oqrt>!+6W;S>(313b0OaEY=PxF6xP3_y5+d+}3)RDVLd%{QP{S zV`~73kO4VQa_g*(_WHtg4SDdD$?a&K-^y!$lsd=rsigUcQkNNM*}mv~ai-J}!-<_^ z=m^U`N1!xx)8>(tKxe87k4Mo<7o(F<`M~Imu;oq*J7J zkMeMougrcHp(W{$?`m%|!h8d3FTOk&L?KdocKz^vop-5+=b}tIvF==prL0(RLEyMb z7TsOmYT-w|26|-37XQn^tUvPN9?7sR6?gXx1qOFFNmkj1cuoTY#IdcnkLDJwyP=N* z?+CG2iSd_Q@&c^az4iu}Ve$TiRZhJj@I%WO#@b$J5<}*9b%GrNdhBm{lVteV#6F>| z!C<>Ri*7OAahi1@X1##BTcawvW)xYl=k}QIMQnteMwr+1`si#{L}R`|4yL4GO+En&$lR&!5$#D(2reUBlx)|09}3j~ z<+90Eef%Rjtx9tro`Rw#ShUCr!r>YSo0L7UDOKzlJO& z%Zz#Zct#(3YLH9p`i+aecXcCkqQM*Y{R3tB^>yhjguNPQX(f#Eh787j66oP?pK!J6 z@($3j`Uk@evO&3GmOcY<8iwQO3hP6l?CTxzhil5$`ZVl{7KG!p@%?_s0UQjR<(`zU zWu}p05NrHR8E?|U_>;V@5Y!pp7<^BOl{QHuDP$ss1BK^u4)a^0#@4KtG@My%phb_0 zIGyZHkAV*xrup%gVj`ajeo7FsrDW56y$eUIFUoKCypkY+k#VG_R8dEnOmP8eTBRo+ zAIDh$P^(qHH$lf^uM^1;c3RCzDgL#0^M@m3+M z^OszX`Qq=>KYv)fUu{DmDkzIU?LI~5o-VO*eBxH+{PrsIT zO^DRYx9gjsy;8=%zM;il&a%l}k4hrxojKwa&Aj_dfA)MmJLC1!^yHC#JQxVp|8~bN7T8wIHn1JcFb;TeC zBXeZXjW6=D3Aw0!$>^8Bhwu&YkITP*%gg?@pSDAReN#x&2M~>|QzN*ehiygQ31vT) zFJ9TMh{4!+HsslQP0mi3tS!1eBuF|SABL)>(g@!k9^fkT$er6vC1tFm=A1{S__$iC zIRD`j`7a~!B)v67S$Bv`{G732Fut{#TEwl~#;wL~D8{q{l@31(;(KRk2ESySsMK(o z$kh@Qk-ml>BYK-@fTYDiF#pK#=c}E$A=lk`g%p>r;Tp-1nQj2qNnpk*cVpzdKOp1* zbbCTG|HfFYa!*858hWPaoKcBuKjua+VOY|XI0mmp>}z{zKv)_G8n43q@29HEXoY`j zCy0I56-834TN)sub1a0m{OT2FqosH>NQ~f_GZCD!{gy{TqXvbs#S@CaN<$EhpZ!CH!3XB!|Kj;iLUr;6=-40HkUI1pmcfxK+~M{XhKnKffp$!~{?~C`<5p&;Ne@e}48C-)Z{`$B;AsV_W?n zu>+(te3v8wUOMQ=->&ihWM}?TI|MP`3O@V~Uiu#rr1l-_=i?ue9xVa}|K|te2ACeq zZUy!K)BegbiZ&jVpZZ|n&!kqR{T_s<;9;Vt@5mH)>lXN9ynbhK5`wBU^l55nRa#Es z%FljNN*5h>*VOy!&^}JSX#jv0WMe7q<7mZ7{NF;nLz%#WM0O1N#&9_ zIE#QiGb2Wq!eSvMs~hPTr>0t-Kw6RxzQfxG+hus94Wx2rThfJ8pKZ@Y$p3L!H#ktf^plq9eJ^+h5J|?e031=!A~HcG=F6^n zIbAC!yKtxCZ~D2k#G{9kdJu3ig2@7|Ka`f_YAXHvIukOh)w{BSH+$tpQN`5%x%Tp} zf^uXg1~&(28Tm)#!TA7iRcK&0(d0q4|2bgmLI>)%uL5@LFP}P?Zx;CoBV<%eqe+w-n&&@^5jna8s0t8sTo!JAk!J2MDW{V6(DXX@ z%k;UL?ZCNO|LClI<}nb&d$Cx%em+wopbeLnzca6}5DbjnO8>^Jl0ji2?+86o36%@egtK-6-(f~YqN!2CYGg13K-e_Gx z2XI&{<*ab_Yya^<2((wP(NUq9)@*OO%f& z&l?}D^mzf1D7-+K1*&m?2>`u(%leBQv(J&wHnRj_(A_0qjrHj))@>RC;#`NS4oBy! zK2I$Eb@@C;6)0Y^6>wPm{O+CM-*C9Up4V*mjZq$`blN5DV$XB4Uo$@~0HM(gJ9-Ag z&1|lhEx@N^8rU`|EdXNUN>r5D`{L;H-(E%(36}*B_+ZC^YA-+aStAdEo-xHX4c0vT zGbFosnm#@$aQ!1pz$f?QqjF#{cq$m~wQ?c$loZ?P`slP9e@dfZyylnqj?X z-^Ibi(8uDBZPj&2fF|+rn(&J&q&U`4Gyq)Zav!%VSe|MBfQ$^?wH+(U08WnXx_IS4d4L$=>WuOT4Tt&+w| zEdBgz&D(wn5AxfgzJN}P1JEm`no&P;oTuJdbk6=j??4&P)1e1BT1osq;W8*+ELfl2 zImJ)Ec)2SOyE*o=S>tFXDk${o16d;@Hpwfq>IjqojWsGg0{ULSBso|I-BwN~fAiBP zotz^fCFkP$2-dM!?=}D^v+b`o{5C*Q!iv-hYwdiC&M14Iiao~$`Pyq;!Q+3e@>`kY zOx$LpWTyF9Zu>J|>uo`7DaF;d1d8obtG%na0s^RDwH`pjntK2u7%5EJoN>;qC#!zw zzj-5u16T;dt==xQrQfLGsRqn{d<)7q1(*ykV?q=blUXS{lkgp%}(FK(7U|nlkf+5 zPCrWaZl78*r@K?nrwQ@m#5aiEHX`~NM0`}!fjL7uBjI~%UjuuBiHJ`+c+caRA3>K& zX^wmh*8AcwVlxCP-7YvIXB#YA__O&)8Nn1T77OvwJ!prH&IdbA3ZU6ZqA$K9rG7`{ zUfU(s(`v0j`@H){jUdb`&%=4Q^RKl9twcq1vP5OByUSylgk@1h-z?HZJb1MVMwg$N zI4Zn7ArBNB2SNr0n?2?3-t5J#RPb0lL?rNF;b` zWeiA9gpW}$c{7@AdX~&7@`PMhB0RCpW;Y)V2R`B&cB}Nb7PpaXrF^t0nx6EPZ}7+g zRt;KsWD$vZgpBMB@(X`cIHq#u1V~J+^2TC}m5owwB6Ayy^b5m=dMIM$u_bVydXldg zubIVH!LZiR>2VJ~{+q203RVOh&k6igCk+WsVjYMuV!h|{&i~$^YYoxTI=ktqs1M@E zpJDeul>we`NjVj7xIWZs`NO_R{EOAdZKkjk?h~=7T*LAWI&!SpI27Zd#%ttXpbb%c zc{V6sJ}s_8=-~ea+V25EAiQfgpHNWHIdGOX}4d_BbIf;4FHIYpK)r5 z{U=r8Nsy$sxA84Uy=odvc}+NIZNU?5nGe-!wDblgIrq&nG% z95lyAQ)l=|OG?L_{7Q=t;A_j<2$m|omz{2TD5c1M57a6#VEl*%!2p60X5GKm;&?v) z+hx8GbC6f%mP=t`kk%7IkUuV`I|};dQt2Cd=d`HMgzmPu5K-1+K8JGaIYM92BFba@ zb})nsvoXR080$!T?9s>-;r9K%w`QyAqmhu91qu_T<|E`giT^IewPk33|GOyIuLn+# zKS#t>#Hklj9d~_4m{ZF7SfLS#$8KYVy%j{V^WP)U?&wd90X}i4o}bdr?yA2MBL)j? z)@H80?x0hxquDYw5L7rk&(}##2LqCzW+lT8j3iIG`!!z8dp;6)PZh8>6fexHF7XzZ zlw9d^G*5`TT6{-%+-%~EqR*aSu}!MWWa1Ko)j$MKhpi}T&AB9%MogM(2-=sV~zH=>Vmg_js*rP=4QrpPwrnm1Hw?B&os#6@)J4n?Z@JHMac z3=l?aN(MZWPkaPE;FfAB)2UmAh^|?>XOQzBwb_Q{MnqoX{QK7Joj^MY?vPfC#qg@9 z{B!pAJQd`t90+6a{~hcZzim#y8XeT??O#o2w$Snw-yHo^&4X-S+~Mlw zf<*=kR={-WCkK+x)kZ+vwy|l5ZCCHpXDSi-fy@{U{N?LU+}ch?f2fP3G90jl(W20f z4==;LXZbE3OFixD!SeInX*HgxTR^=Ujc)Pj^D{T>3%?;C7rMjSvx|R5?)-z@%~-Mb zd;3!o4aTcC&*mzEjK014_wh0z0Bgl;M_)5FK=&8Ne@>x|N`bIn7knwU+nbueSe?HX z5V>oicz^%z4s$33X*FU_kAM!w{P5cmPmhA3kQFZ=9VI%A*5+XXyHLh2kWTwW8}YY4 zOuWkv+BrdjZ>z$ow2fFIi9s)320k)WYwkq(lS!@0ZUn5rc}spKSNAT^%{Tgl;Gqg0 zVzbF|{f#EfV(o4x>FKA8FJrKd)7~-Q|JMwvGN+9@VM{viu#y%`QfQ&u$MxEqeNl$sUD)RlJ1dYk~IeD@mngp^a6L#}wU7}!M zQ9^TxAsOYtjDEYo{V~P9oOnffZhZWSSuxwf+nl`h)0Nb>`)f<5?GQ^*h!;Vl6a8s4 z+q7pyA!oYgzu(iD>oLl~uT#3~tN7{v9C>-MPINc%upzW{8xfCRNCXeW+I{-t8%x3A z^q5Ip$N(2zPPs3UQF-%M@}Le@X040Ql^uhNDg2MhKS%!m)h!7=Sc`(F?QQVz-1@t1 z1GBmsi$aFLRlpO!aCFl;j13r9Ybxjf7P_?9#i7P}=pUoOKQ<7rV9B^n$zCFcs>FX@ zUs4I>njZa)O6g$?79Bn_3$rc@J}u9n?Q&HYVx&Wv^+aQ)SqVX~GgVkvr187JYyD46 zBBX9h`#a{wHLNemeqH;!Y?Q!%uE4+EvM#NaNL9kZ|K!^-xME#)d+pQbsbGT9X52Td z&3s%Br!!kV5)UfA%+{Y7)gJvV#U8oW3+Vh)4zPHl-Qn%nP{NNyj!7E3x1ppqhDHN96wGVsvb+FD zfV&SU2ZTam2SV}UJYx^r*DdBMR1((SZii$7%C#YzY)OJG0jD)v!&xE)GP%_}HLXum zdzz?xfGK4KUeOi>cIOy_Q@>}{hJPt9Ol!6o)VTXCZlepm&}=Ep;VPP`0;?-Qx0LNj zkQd=CX)cx{Pm1iDr~nBQ zPbT!b*GjV*hIS_`Q6Pm_E@6OU#yJd{^54Bj5-TsQH9p`DbplZTK)$0aP@kFXyi`(F zWSQ@wVuAeEm$Yoh0ITsJ z4}+aZD*aY(30OzochH>W-Q6t6Z_5ZHPN~k0ztZ_pIPA;CQyZ&-U!C$y7aLF>?0yMo=tKi^6od}td@tB;nAnlF+A@c6gnB;d_D_1-_` z>H?zSaeUMwVgz^Dg1Ox>e7~o;{#aKZd~z`GT4$VC4I0$h(!O1f3%ETq?>M-7@WL4q zY`LO7Us)MjD{vUZ3Al@GW1LYa9GsbX)Va}GK-D0XVrGFo2wbU_djMd?LK{457ZKR*oPTb?b7<3J%f6$ zJ45}BnWP(FzS|H1*+-=ppluaFkt@3j7>5B5%yrK*jro7!?qtE&PO>xY&p%n2BW z^dqqgGZyom^M^{Wf~ev7dj8N2MN7W3bvKWsx{Kri7Av-R`S zPD2GGTYwfm{=M^M#a$NP>RqBa-{j^w@{TKo-fy~Afj)78=iw+uq1s^;#V76c4{^x1 z^8`!r)2g9Dw+ziIPz(|=53@CN~oo$(v}?1Z8tdG?5qG@BF+;Ni{b$k1kIWEcCKm`wfvL9yx6{9Eq!Zl+5U{ec8cq$w{bsaa6+Hxn;!9SwC zT;TwvF6=}*9nfk)n2T)HS&pfA66ChC_0`%_sh8Hs`yFt&oDW|XXjkO8Q{bB(T`H7? zHT!SXG21QZWfhKi&%{X|v0Z#4>SmgefDZ4}oq8ZWKqRiQby79nmC|)~| zn;kTDDdNFowzsDVZhwj|{V#`7rLmo&Q-+4P{NW&VSrdw4?dEoi@;ph{op1Vm3*Kn^fIYuU5<|w)RMp1@B%4+2wjgtl zKYj5$Our1%d7o590fm9o3tH^<1b;eI+s>P4$GtD{)_6Z^_>{HaZzpbBtMtR1#lZ4* zQatZl8c!A~ao5i0^y0{_k#yC_#C^2<{5M?5z?)=IuEVULtgaJot^^P3Yroivqo=Efmo7XXe2f*o?Wc&x@_cUinIfnK>J zWeY<;D-|%W*r?RkwJWY>W*HIA?+k>kE~iAEuCK~<>zd+t*{2}Hyf4Td%wwo3?S$Fv zOcqBn1v^}i$EMOCqBcBL6U%t?)StS4~DMw7Bxn5LPTE%L@b7pyW(m~-jO zfGOw*gFJ?PT@1R-Gu%rK-98ib(Uw+d+qqkNQFH%nfxo_7vR>|RU%V}Y+KbhHX&Mf` zXj%`gD3FbP?i?_0R>pQmOrHz7QHwN$Uj+-AXUJxQCPH=KH?50(#HL)fBym+-@5zlrW2t27O6=K^&BlOS~%#P*mAuyMQUXs@ee z)tfj1+*XkrrP8+oDL%BhHTygtUG2#v~l&s z|6dGc5EaA8z9B+0sb^RK9R-TyL`Qknj&&m=S!+-2qr3U-GailbsCX4}q|E>G-6fT+ ztfh=`z&gYM4A5Tp)mvy-`^(UV-KCns$fST`re^q-*?HAq|Bt-JhZ?QHW5kI~$eW9H z2ZWLTtwhVytYLP89u?|hlm0gO&G!z(!j07E7c|d5{f@J2{q2CDPl8;>f(+PG!{Lr^ zL{Vz;9-+3^53!XHrholvJYRD_j?H(|)6z(HxWoMJ_S=?3%cbFJB{nssm~UnqW?&4p z;QYfwD$*_+c~CeAL0;6Tr8_zxF772VAjoU31FgYVBZj;S)z`hN^wp3>p@A7rf#g>j zRp{TR2s`mMiDYp9PR@w0HFKAx$rsi+>l*Q!8*a}nmbjiQl%7MUwuEC?aNc?z^{G;l zg?_D`{G~rJyd%5kp`}*b%;33`hS3w=jm$W1wCJIn>C|;ls+=qDK!MJY>jFjt^JRb2 z$|Cw@dX{R3W<>Q5d|(m^IagXsukRjsWY+Y-Sd*sfE(3NOc6+DQLLq#_C9-@DWSCjY zKBc7W$5#c?&AWSVzz@?R)$v6mjriLc+Y+aBYV``o&BlPi$+Y_VDT`l4j#^HGkp_?|=kYqCY+2f*u)G9Ip9)DSW)Fq_>{R zsb5*>ywq~BXE+p(xW}j-(erO)@?Ls4X+g4;iWd|I+)i%e8DLUWhPLUWE>1KvHF&h1 z&M}J{r*I5iY_;}YthR#-GRZNs2wI8hNfl)QHN0Fb#sl2|gI-p`3IQL0ag&op1aTiO z?fxLgJWX2dAD@{dP{R+Mbez)ruRon7NKewJw_K{4bTHd}cV5RrwP7ClHw!V!DK?$| z5Ww$zV9nwWJD%squp7bOwW9YMg-THLlEM3lbsWMSI{{EQX6Y=;_F#3+gHJ4mFEX<5 z_*JfN9D{T?euW_%#^p~51SdUW^d4&;)DcG3p<-Cp%c(+yJCMonhY>9Ud}>Q~*k%WA zjz?-R88V?rTbwQ|&MLSj7^t=E_mZsLZ{N-pQ|w!9Hv_j}SaY|PS+Jq!bH=)h2`P*p5-H|m%Wse;STVp5dyecje8#pLoHIB<#HUaYTS8w|Sod{149 z4d&mhV9+!qd9Tz$YPFFaAZ;N}i{u(E4MTVXfoX!$l8RF;mYQH|0VVes_6rMvaRd@T}wI z^`{t&ufSUKa`Q*eH)%;Rg2ayZ|Bv(C`Nd-zm1kF89zIR?zjfYQ@atT)4hrsn1G8=; z6?9BaO3ILh`pqj6%sZCV57O24bN2d4UcFabj4M8iL+_`f$+)_EBE}oNEPNd4uLkb^ zP%6Rvo8aNf1JfLbgDYp|U}!J`ZDM)IUR(Q*$sehpPZgpKJt(r+p_>F_X7^XrbIsi> zfN6=sMzrth%Q0#Y3%p;v-;u;=otWoJtJFUB5gTU_ zZAcsxklQWYo5VrGFnC+dp+K5Q%`q?JtByV{)8B7!jbkO$oHs^L^y#e7%gopL{EHsk zGAA@x%p_t?VmHgW0+4kvn8ee$&5%tRMn!YZ{i0X8UvoU~Jl+{m^bS)uu#A@zvt|p# zQ(b-+8F!aA+*f_q{`GU%|7vOb7l9~IFZpi|Splj*$oThWlICLIZT$V2`dUGJ^mQ~ByzSypoV!X^ z)E52yQ~x~>q#RbMD!h!zg3WJZ?k(o2D(%JcKF8AvE96?__W*>o-ST{sQ_`%STZIT3oO#V>gz6i%GScO80fkm`K4SD*$+7N+3wj+tRLYgw}}b z1I4wo=+5h0Y6$#-%+^pf?e((gOCH*tkzHs(uk+Uf&>i9qZC8=lmYHRWyUA(2;C7r0 zF+@FlRUZHgvV}C!QUSZ+rLI6h@U_S?Q9vu(a-|T>@h!q>hZbM2P@^+qv`Xh&LR+h@ z`0=_O6vvhh%!zDq+~(xOQS7lTCiB3`~rF>Q)#|?$2k3(K3&x2 z9tc8qK%Acaw%8c0*or`l>G+*%G4xzu#1TGBc6yah?NEsp8gW}=zldG=)fa{p%^zcYDK(gJ$igl z`^+KZy{Asiv0DSP!wf{K6qG8U38c(X9g20!Zk!uFkWsd@=XXiY>bMjBXa;hz+`^A{ z`Lo5(mK`3x3_}`PpZ~7!({`ev$ZO;7PvkDU*qs?zwu#J_dNfk@&{Ec?Sz}nPa3U*N zYS>_JIAHEYe)}jt#h@nt`Unh!rIxBgtlPk&-K6VL)r(R0Hmt;HrAq#NEyCyIDGLhFNN2oy(R?=NSNh z$fsRZ%JNss%LGZU+RrqX)B)+vx2pkghP66hr?TyP5He`8(DInZePH4_|1ThRw>p=?42&a*(fLrErhlmqc*oZIa5Mo1^|)ZoP|569tpzdQFi<_1MHA&2+}pYOIr zvt{m;x<=zu|A!FB{6z;1l?kmq)*CrJv|#@itAPEtmo~{m^r?1geC3x_BfxSb5LK^^ zt1uuCh-3oU+rs5GJgZrYnci)bnaQCy5;YPQHCE6w%|@KsD#}+~E&F+=cU1Z#FktYC zYuEc`z=}$tSu-`?L-+AXsk`SUKA_=0TAbO&AB3cL% zgy@Oh#t=py6G5up8H}hAEz!G3L}&Dg8H4DIA&frCdj$Xce(vXezQ5P?K3`myIp^%N z&suBmy~=N`RWW)S2%(A|`sbD$8aL8TFS=jM9iyMu7(&4Ih0RMXKkQu8efjH5{twtv zZjE(yG1xb*AUUew;u~5f>x}6VJp1hTm=u9j?}=y+Ds@k@N|dkc*tECw&64n{FqJj| zYQC1|7@aus#|TT+aCi>efs$G-4cbN=qxtBEi;zu|*ALI(!jk)My29%bvYbji^|S^m z<(7sTUajDfPw&dZ@4N_MmF{EG0-_66p?_1aic>D*@8{z22@^5!N{fF@_e!8*otM#o;k<5uyHbj;N$Pf68<-S`wdI`FQ-DUPq)D@ zG|q}SnSxBGi0iFuN`L$j=3~Md#@aV(-@v)4Q8Q8yT!Q%?r*EbgOv;1xt*R@PQ1|V4 zFKU6aZyFvh&swc<2+TP_66Y$j>-VvgwUChYeKX|dOt@>2f8dUVj0Dr_(u)7`sH!93 z!$e|0OFI|Bdz5KoMDn+pV~1FL3lMfu+|NJ&l^y)tUtwi`!Zl7 z(!B~P@rqdizN?1ebgsdzvnvDQCal0&Ag*mQAGqdPDpqod44mY$a{8ACWT@6PQ{D=&8g7jG*{)m})){!pWT@G9=Y zyq|2U1W+sKR<{;X?_aTtzAR z&lNAx3s*=jgUkW%yj>_cKqXN&fh2p*2GO5DIhXr!DKZv;X#f~TsI;^VR_L!!x}EY; z;Cbt^Y(D-d(gc9tlt{O+qQdV!iSPmk%IY1GINw^yZG_mj+zQ4fZN|Iq4y^vncmR6UFBH@I#DxMNdf@HIiYl|)UW3v==km_Thd+q6?iN{nVRB1 z@5WSwa_eNO$=SUVyLYr+SA|c;J{z<D=e1>0$$7XhfRj}m@}LOTRp0f4m$o+KnxnV^cY=l#o??XXiM^zXdC6`zp-#^Iqz zKO6L_rvt6>&qRYxl}HCqG4jjuW93R21oUnrN*}^M9sxDH-<_)AeFS&FZ03F_bge?d?=}mJ|V(hPIMI6>J zyVb0T{2u^~09Nv&0;J-a#| z4{T9sQAz3BcNHb3TW2Ddv>5ah&s>VA-)TVlc2-v`j^YJ8Td+&Z>o>)vg_orX{$s-l zo}&t4z?zxVy?&;LI0{|l`s%me1; z=KgfaNU9!+iUK?J=}!u3WJ*!T!Vu8O+mq$XWmsaLn2#xzA*9~2ef?L06fV=HkzS{| zbTQ}cu|(xkxi1Yp5OXr-D5|}CYq%Ten{YMcQbqMp$%(`UP$^AyJeIio$FAvQ zaxRxzqq!=Q6_a-r9CkIGS0X2D@?jZAqWCr#>7A zK2sz1Ta)8wj#rSC7)imM1ffaeT>ZNDvSp?<{?VjoRyB~LvQESs zPaO0*<@MK|Gr?AkQNyb%wi%1&P(15Q*MrY?pPuBru9f$x6mjVL1{Ll&9>gUK7$o^7 zd++fk#tuEBl_JH@V&oYoTeGR2TI;REOJQMQVFjd)Y<$z|2$c2RXiqjIDCM~38#IIg zZteSU4!RTShn}{Zj8}=cb;6zbn`u~v;@MUKtQ3~7C7exMSK@Mn{K2k67-AB~v?I@G%&I3ATRTlR zT4!lI0OCgY#@WstxjpSyH;?z{mp!BbAO0@L!zEJo4FTKb?uaieHNDd8N6bwYD-pEn zxNM2^#>Igq&KD=-3OBZ0|4TzUQqsr68E&%lq}%LNCvPSpMW?XK$xV`9P7rziesR}2 zXR0$%m(ujnqYeDteIR}8`sOCv?Q`eOF-Ex21RSwq=F;2u?@MEGjIJIRwqIQ8-ZCd) zj1&$#x_XM#ULb-?+nI2aL$0fzjK{mo@heURoI3MKfr2gs``=DHRp8~{zXAn${&T)0l)(CAzglziWFI7}Q)Y$Q!cfhp_sP0|0IasVA2K>sfc}7tARhXO3B?8-_+7fbIno=9$KRqRPXh9O#!IWE;~nAOp81#XbRplk3E0fB$2+DB z3jvftj|88uzBv+>%U62t(z@4@?FFbDagAM=a?Ufx-(cQT|28=fonn=H8}kuiud}C- zaV{i~pFVn%P|3@qyWY?t&2C>|@xK38D%fRCuf5aqGx77XiZl7UJ6IW#>=pJc=RcwW zsQzL;O8n?~MFoW%2dna6y4z~ShYSj;1jY;AS^Ey$wWa)U9@VQShQsh4` zP`CmY6l-6$v7QJ(8tFD+HhM|6TK%Uw!>P6j(im&>m1lt}>sgpk(>}T9I!f$Kz+p-8 zm6SvJL;zok2f%%>w*$@nRHQr{mO;X};)qk$i5tP9ep8RUXSTWe|HtPzQ6lAeT$%|J zCl8eeKgsvH+m^_ivs;a;DAi0Dprg2VILR-M+Af|iTgNHEo;ZwzZS)^fO`9DSbK0OG0Ihmy#G&g70mME-hbPF=tr-mzb7wXG4b@bl)8j!B? z`;wH&ShtO5?{|*hG5ECA&&w&JQ0F+n<3`SaP%|`Vua#pd_V*In#$H8Pdn zO%yv!Gf6^sO2g}3nd5o)Fgxp!e%q_)pvlf_<354)E{!*QOIUY%mHb!)6GT3kJZ+MD zzBfIa(x|k_lta`soz9(Qjh8>z(qI~Jo`3-0Vya7g?8|lX4=>WwPk-*<)$sGHt-$ty zutB+FYE^md>eDX_I^NN9k;!RgwKL$UHl#oIvMNJnb?d-t>d)qMS^Sv0^>$bdxFVJu$a;@D zklp&^C`!d;EQi7NzI2G&A?_$->si7GcG)aZ z7)NxM_8wq4L&@c1svfC(De7Vw0BnMpaThKwCSQO1$gu?^o28Myjl5yG`wr z!Urz)Z^oQOZa#5X>7LziOcMS2hHbZ#s+hQmoh9f=YVNxk#tpQA(OOtUsC zuc#yHOI}WA0y`XL#6`whjVi-L#3Q1A@?L4(FoiFQVS(Zfd8Qc1N7Fau#TIyr8e}$$ z<20d2Rii8~I#nnqu3oG~-e>L8T>ttu;c}~GU}_95sY1MKP5E`WPm9<&4;uXpeF(OF)uf944*enkXeZ!>+wC_BjglFU7kR+F9ywW_JXM5 zy8yq^fp9;5VRuHlXn4}ZeMNbr%tO0JL(V7;)P;;=Q7~o8y@|vma+OP_L3=Yd>WYopWb~ zp|c*gMzegTrJc|+(+xiaboJ*OxiQ<^*t*HJZ{LVYV+0?5Fs7>4EJup-t*sirELJ`X z$%kB9DQvVVmOU9{pf$|$qGQpUyOVP9%QFvMHA$qM`>qlbf8(!K zce9D}JI=~Z?N?}F(e+W*P|gc!XB0WPDYVFrkBwJXdf*#<1|~M(CHM2M0GDsu4&*`L znT7CuAL<44-E(n*#@B%+02VdgWdFp`5 zV)L$#dT^6&NA?^;r4aWWY_0+JzBhU&NY3cxH!}?ZY7cCFQ(i1;bKlv9|Wt^ z-e0ci8A*fP02zCfnkv1R>9n7`BBsx@I@Ja;mlnS-f3Wt!3YlsuP+gh1*03xfxwHIu z``JK?hk4zF&gb2jb7v^0DqL$e=WY-;+3w6%cZBJt6-cbje9^)hi8c6yF=$$=4d=DD zCbQY7b$woUy6C++6=|4Ve4pU6_2QC?2G8o9D%CN+r(L>%bn)8`Hd^S zOqV*fy?`f+5mDnv$VN{Z`BZkQ`1|swhJSu2`eMtR`}_12MI1+X263GRh7OE&tmFxB zU23DqEa^4FhoUQ&%EXrUr-qljW)Q%3R;Cfnr3o}gBK)3uZ1sRyiBa1phA=G@FRZU0`6WiKq*SqAHhGrD{yZnf!-R?e9XYYWNkaJKOxJxj2mjGY4bzl9`$}%l)BkP7 z9r>WLRR!{(Jl(pFc~vfiI21aLq^NGaSR{AM1dU(}-opttMCKs8x>+G>vutYE9Y_|C zS3AMGH=qR``L$9`dJn8qO8Ogn;Xb}=8uhUxbq#*q|o5>fxgtS0reSUMS zXwP^n?VAr`&zB$_U8@6=NFUDVMF?>y#j0bbz{9jf3qL(5ZXV_Dxej^4ehV{<|M_WC zF2cav=SN}h`@roFHkjR|;kuT%=n`DtAQe8eV8)pD<=TucxUfud`ZWzU8M{dDJ)4a# zTfKI9sfI{GykrJ0-&w20#zr-u4Er6UtNqrhwmn?WJ|C!!sr?jG_ipvcQg?t3uLg@u z=?{o_W#PeB`ubuNyq>Fmd&d|aGMYc%4=rGK{8bqcLiDLMaWV>@3&sDsJE;Vi;l@AA zDw^dx4<7Gq)c+{#hpo-8QkxdVx1{JmqxaVM6&$&05oMRg50$VLFN(}6fx+8BzmdvFU}SCt%S5DwdHC&T(X79BXNtd|v~X*Y=0?ls>rm=35P zg3qyC+4a=T#3j3c2$?gz{hZpK`W!^!^sJ>xQJH*sboWfCu}Lg>^$+6BC}+MRF$Nay zGb}QFxiUR_oZ86_2=-aw0va8vq`hV+36~X<{ZyIn^MNr61j=HzBW!FaNqSW{2~h`*l*n`A9bCwFYEAjJeiSl z;Xx+jH2e4VDzJr)4Jus=#a55|o+W|MK+gJq5J@j)x&21O7i@3z+_3c6u;Dp7|Tv4&S@rAYh%f!moX4u~FU?`hOTXVLi!d^Da3tZHiwW_mv;RPr zBKh6e#o4G1q^nDf_GvP{ZDr(FNxo@`+fkVGHG3n~@y27bhm*i0Afg-T@bl&D!*e-W zDN%Tg$1iB9A&lMI8M7qCj3gL&w)Z186HCAjh`SpKGi(K&?zHHO$}Nw2T}4H<;AUwE zL&gYu{Z3k+lYbe{bCO|xIm zYOsrGl$#*+!b$h_G0aO_{mGWGG%z;5AVzZ2kusTl`J4oh&vX ze$A0U;#UYXZAG@|J`#6tEJyhvhhk!}K{e0$k?^99fSF;&qWi{%mE<`xd)^31;tqi^ zqeTsGM02Z7L9QfuRSmiM)@^COGB>zeC%QjGQ>epFtj3eK7ZlXVt8nFYr?FTv;dxB@uhJtc%@GDmI6Vp^3c#*b0zU%Wiy*iqj(R(Qj2ur54Nt{JEBAiADic^SDY@OdKf1Hl zeJ)E-kYBUVeyY}e4}@tX((9vWt`~pt=rV%Sv*>lp+6_wzU#l#VkDsor^djiVa~fXM zs><|8nYY_OSIENxNLLdXpXrik4m66CJy8Y9Sb~y9+fBt!8^vw-2%9Pt+aft#zq%GW znvaof(#(O24m1Qbwl6r?HOC=6m zWIsr}EXSQvQtr~bB*c{Lw%MZD;?omAU#5ZDvw3(%ap+KQ9^&ThzI3cN>(C`Q>}=s) zzVmS_`6e=*x`n?b?cbQPM;x|kl(HVZrYq@kFOn*=epuU9(aXJuUhL-+Ui~&u#h677 z^??73)Q2Asknu3c&4y!rdn2yiZ8GwvDh=kEtP~GA%+n>yTm7@<>-82__za7QJKr!X*@<77ma1oIFrO83)QI+R#QI$d08H@%~aEWI_U$m9#6i`zL@njg;fWhiCerJt9jrJjVkCu_w(eNMh`qu z%264qwf|7NzY6y^w?#)~RWok6eoj=;YZj^;PxiwhSTn1W} zhYuholy$k_k<3VzTW*1gSyMzwf?u)B<{F(P`kVge`AfM3pcJFlq(PR0b?^QHQl6NO ztgQZRGU>}H_bWM^W+F#7^|GMZL$O4|(?D$sOY-&pLf-q`$>9y&Lx$&8;luJC2mT(Y z3q&~_c+@rrB;>sH4vo;d5#OId7{4g}B=n^7-`f?=DT6=4qHPON* zvqwNIrP{qz1-&=lfox8Ov^!K zZdJ^t1NfbN`>Yn}F~7T?D8T7(nvXv!1}|#9rumZ76%5ha`I(KZ4HHb0f4iQo24+h2 zTVs@VXkaxkW~sfiWnBZy!x{a0JhPsg6!sylqHisw-Z*dTd~@+rf2r z%e9w8I%Pa>B#>Jpg{Jg?Wpmh318LB5spl(IDVskc^?;>c(&LqJMeH67A3xNEey5fA z6ezaAY@i9r{xB+zjdOslyM-Q%Npi$~tq(^h8ZwnihiMt3QSP*TgyD)!+WEmredDuC zEn_mcwL~WONB6-W@57lF#ZvbB*X4-Niu2wA+X;00adj1zw~RfrslprPw+V?^^SF@f zOlz`z<%8?1xWX}~Gf@QA-uEAuXFDs15(jf@$=&%Z3z@=2mE^Lj)JAu&FfwvPUO2$& z?BL>lxKH+`nf_cW>EAZbvFEUI4tVP)Nl9FCiK{}UlpOhJPX(O6+~ZxNji4z6`+dnt z;x~?|x-Ng&Y0YZoeIi%9)LsJd_FS9C9l_Ez-_q^gy_R};8S$)nM;17{YIpMLEF0`> z$`XTtnLtB=H#&uFZRyg zxPt8;YOm9Yj}?j~_cbl>9jTD+;GW)w_6yxN&L0`7K@|LQoA4s~Mkda@K;(Ywd7pn> zP@c=Q#`AKD>~GQjohIQeU7-7)jt^ejr(UjPO<0AOOeU~i^*vlbxJEPh?Q2Q-&M*8q zrQg+B?o!;E^J4v){UM}(y;;_o-Fa!Hc;|)wLF&UUd8sm%Ut#|QDc--77uQbZC+Xu| z-uW?#2`4Iq#rK8p>7zJgpAQ-`eCM9iOl^oIU(l&lx|^%VVPAgUTuU|V+PP1pjR+iH z1bn6Rwg$`ZQ3PStnyz)*M`O}tvL+{&!Dc_kRo#22@G3gV=J%_gOAj#CDTh#_WhLXs zB88p>I2B`YDP0B!1A6dplaN47B+539INQPEIVM264}RGRmo{Gl5&$J)$&I$twg;U> zxS^?D(&DB$B`p%aO-7Od4Q3mNqO|T>H!&Uu?oN@CP|bxl`U}4ut+3=eeN8<1h8_Ke zP!^BX0-tW2y}1nf^MjV)KSMsM@NLKGd%YG{+DF)I+qb}+Nz9A=*_a37(i96ojB9!U z9kXE@C&wMeBHXQ|o2UC0NGh{UpJbRjFA5YAC?ii6{Xygt4M^6!U416%z-2gI;QYVE zOho7?*0U4s6_hSgumgcGG}dx)OAvo5XKA>1MHgMV$lemp;%T135eNA)_IC8&|1{z)qT*DSGe;rmYh&z8w3_ z|5&AamTF>+zjujS@yIyp(`6rM;5vZ2-IQf`a1s^&S9GGd6fn~btG6V)k8P=c!eT;) zf!z?-ncFA9hZDiFvImsNV)qr-)nn&%y}~O-4jG{+1m8#Vz9~9jvu;I`?wDqc8GgLB zB}Oyd#{oXR^QD4?V!D#DYOsPO`odhC^sUrqv$iDnAK_~sUP|M|e|^u+o* z688?76Y%BYv-3e;c$|ekoUr0J>Z#yz7^s|8;rmY>epd&0_`gs56C6I2xc{Zq{}WO& zYs@aRy1Tdg{{8zHg7+=~gk1R>H1F0N--QOIV7%%7*{oV$kp57q<;GPMMr^QPtmdlX zxjnP^%p}LI`DU5G?i0_;ijn`-87No|hXaMAPy+aFjm=_|331|%M`PrsV76flqa3c35k^Pet|JXfefAD6Sxn)P$;u8yghQE@c18t{V?t{|f{rS&@&LN%uGp62lY_~Sw-T-H~tx1GW?wPsv8QOd2> zX{5fr+&SS6B!5a+dp_k6kemz;b4KmSvk|_=R4?k-mgs%*iVZC?y;vIjvKfrc4#qBB zvs}nOx4OQCT=eMr!F=C5Zr+WvU9>ByvfHvf1Uy~vya??!=r-iG+*cQ^nd4lC7MlC| zX7NQv^E%#p1fngElH4k}1Km)9gR!2y@4a8NU@M00702&*y1`+Z_d{i1X%;DYDf8a2f z*ZQG|<}ygx#c*C{_2;i%VDOtQVJz{1=EbXYM}#_h2G9 z9r8^#oK{R6*;1e!A10eQR9`b>^x;Q0G0&D;ztJOWK~w8z;} zKG{DzsReM=3sJXiR_v8H@ENUOB#wI)o)ODjv4O)&r3%Pw4p8Y#e>N(VxrOnX`T3yt&gF2CYEX!zCYC>pu5hLc`odRJQNWi{4AYIa67)=CJ`}tgiUUs&Vb= zR9COi!xa+&0l)^WQr15ckJagyR{~ftuone`j5MXvfwpRDZ!7XHc6Bw-hp|cj=0i8U zp=BwDW!%VE9HT)&BE#jj^fnI4#zdAIOj!MQuS;vabPfqo+c}qorPQHRr8$82+0=Al zwxw_nJ^DD5)O5ZZ0e<4rRpmhasAL&eox-~X5~V-C=j!HU*%T}Yt^Rnv`X})&~Xy$G<3P{GBV%YEhU+{x{DQxD*c)J^r( zuYfw<%n5tN9#)g6bOO7SMb~<#U!T z4A7-K8INGJy2Htd_6U~n`UCD)o`3>r#i^yvm_B#gPq%$Nmdl6^N#XLl57i&|V#TJi zY^PLo89!Rm#uPp3Udr(9y#wy1U7e#pD4A~n(2Ra!I~s=1nxPW$f0m?g0^zf>oU{=9 zc?MV#30Iz8fmPUndGwn=8yWI0SQmOQYrZUW`V|%B@cqxhXTh`6+SOu02Y=Nks|O@T z>{e|SlgD34As8TT&ddXZV!!m?HbWK0B0FJD)z|#)tXg7>6u?botIsB1cclwc+qoDC zd1T)ePxbjJUO&lC-Lpox4g>h8q$oxp*xm z{&g*wg~6hmWezBc zTi!)PXCY3XeIF;;1Hgz%RFC?RZLQo_<=F9NkQdd;Y~yzhKaP+*ig|L6+eSx_Adkhx-e(pf-6?Vzem_2Vjw*fHklbnI- zfuOuo<}E#i4EuWQyP#^G%*&iQ>M!ro z38^Pd9;x7f)of^gK8S9d^B6pu$rBN2$$p3G{^y^KG=EjCfB&O!h7|Q5rq1Epcz~_q zv(n;!049Nd_1Img|6SMr`>eue;P*`RrvGP#r%PF2{nzBJZUnr2OYu%5T;axzF7uLy z#{kESGZ!z?bKJfD_~J#Si(M}&PXqALtjsp9W5t%8j)CDhFO7wzCC9#A?e=dQ|Edc| zMt$qE05#oc76uj$z3jJ|pT_4&&y+BcM1f5_`SPG>8iN(?H z5h|oc0Vyt0_bjwq)HkjTbD;G&n|=oSXNg&|XZD^^5<2Ehgg+^I^_xCxa^@=gUd)5P zAXxUR0lG6a4;nJoYIp|77$`oPeV?K|+RFI^(kxl$yvTRRLXTLcU~{Qw`S$++w#t-{;G^cRaW>K;do0T>G=6<@D@e70dL z#C>sK6k{`FeVwFT2v6m>)%tO@lc1gbKRyS86=f6<@q^g^PIzyf$)a0H-EA?2Q~1Zv%kgXRb9EesIiu z9DSCgMdJfSZ*Omuux(hW+CptsgItuLxtVXR zgfMyKXquqOm)6mIM8|_IDYHBFm#e!X6_EUvXUs%E|PR{lx@dj~QTVCLrEKKz|-G;~)2C=d2(}i*LTv})Sk`G&qGcStf zq0U}IAt-WVeSKoGQc(km^oOnLH!i7$Zav!HCbGe_#BXib`(#79r`{cubw5~sAvfmI zSPxsTEnEHQk&?7Jiy%(NBVdwahUezCe*2}6pN{?B4ammJADqX;ZIvPwtd)7{kjPe6 z056M1cKjA1>z3f_CPAh*_h^!Fq*a@e$3QVTQsAYcN!Pdg$vJ-;@?M3pP`pyCVb^z zrZmu}=JfgOs>8dvcDti5KE6DboSb%_e~*a0q7OJ!*@6D-WvGd2QJm>?^yu`WRFAh2 z#l~J<4v<;Ly1kYcZVfUJG@Acv+3yJdWtHc`xx$qkKC&(-F%0As(`Mrr z@`TrboJm%56FDa&V6sKpmLgrTI8w!{N%#%6*ct5`-`-yp_y0oNChSv-;T0ymiGHqq zY8|-i!ltuY`^6JuOl2(r~T}IgKfr+9eUkA7zjk*Ys7{@ z)!OS!1~uh4#Tpur^-uIq4}_-lQ2VTTc$|X)m#noGKNJ)OTbp~_n}Z|F@<+6smE}f> zg274iAI`I_hh5^+>w)Amm3_v{Oc?u{fx|^%E7#@$jBICu^b?@#$n#ANyq92|lu}%g zArO_s^5H7-qqG}#)zBVxmV}K~`aqMG7ngbTc)+~mBVZc&oi%99L8!QAbSy5ueaKww z?TqhYkRbjl2|C7qX0zW4)2-=&`EE_fQt3MAbgz+W*zR`RxO2tSFA`dSr5B;@LS$F@ z;})!}KS~Q4u4niiO!CdHISqFjOJF)LuH<9QCkuv4P0ak(y=%f}G{cKA8e6c&3lm*{loca@uml%Gy8e&wR}m{0r}OsDi~xtUGM_2X^C&Tw-|*VTHmh^Dyj8 z?8Y?%&{NL)ZyLCL=<}|Zy+A>0W3xOmdfnz>%PHw$Mm7c}-c?J^MZX{PHOBJpX?fGB(O`e7KQ!L!4t2xR;)_2$U z(ohM$E`9xk9zw6jSoYSkuB3hmvN$Xpjj2)x`kitOV_iIw`>~@AvDVzqWwtmy6he7AZnPezIzx& z+{Am1MG#plTg^@T|Z-klW&i3!q&~|wM)yb^gq1K@%ProKR zc{;xFU@4w0t`icw4Zse6+1o(Q)6^}k5!9d}p_t{$!FNFO0dJH z=Y^VUp+hGK+Vx*FXE1Q)XVgID{5WJ6b;5?|0Rh=yHTZros%0dies_n;=FazyTLy(D z9t{BeQ$8O&=TkRzx9csc2+Q8Ew^1`tXl!hD*WS*6vMa~8r=6;9eo&_B4b_LsUy1VK zQ})D3e*pVg4ldD8dWeu!=G0>5$`at^0HB{2@nMfke+AE+-J?f^+IJ^Qf_A?HmaP@x z5cY}FjL80|E#Y}k-=eQ_2>P6NbbgVNCjROS)t+Z60QeLam*U9s4HqcCp#oRINjmb< zbS*Q38!R?aZUbCp)RaZ#>W-ti%Bua=R!H>+ZIB%0C3VB z@l9prnp;jL4Q;jH7i|Zu-j9+hYKLZ8P?AKSgL1pUhf6Fj6;1Hkf~Vy=T1lIc^$6R} zlpDbUw{Q7+F2@3imcL51lkO_5$^OZx5)+xL(oVc<_BlcSDftFl#Pm$bfQd{x;v7(0 zHhMAJPqgydpAt= zeXwpRXPJ?0=#!eOrFnQ_Z%_86G-<8)`!V)==$Hx?%5$XN0Yc}NT zeRt{B<{O01Dt^%K$}$VAF4y)_zo! zud$ihv9p3nK%WrXO;%$qt>mn{EA3}yY*Yw!eR|*Lx^hNe+6T8&vs6pg40Eme=%(oZ zuCHPHqWyc_rcN7)F>TxNO%bgV%k5A_z`<8+=~TKj+CHfN3SflB*0`k&wCj-5;$5)R zg*ez^P|@Og_VPP${r(VVhPLPWe7%0Th|`EdGgQ`0K&(J@?KP0c;hJup_8V~i8c(v# z9NdQbjy*@s%kRXP@_u0j>au!!jawG3r?$|D?3eRnvOsxZg==fv73H0C&k7MMV-LSr z=UdBxm;B~EaoEDW`joY@Ln!l7)qykP#)b>ZmoH03TD&&tZ8>|d`t<_$7q5z5coZcw zyCClif!aOA)bA{7tbf*T)bmPV`~ztsUv4k!g14usvK4P@;DzmnLL@z!&x7^`(mZr~ z)!C>W?mACg_1-Hmm|9RT!%NMwZMqE&k&V7GV?+3@CCVHuekknPF?`zaT-G?!v!e@& zmtaIMJVO3IrGbm%ouh>|a{hZKq*~q93U4qnHP0WoML_R8b)KiNp8f*&tnf`9U_O-s=d9OKt zw_REj#AbCD3R1}bX?JhYWwas5Oajo|g52h#kCPJ>O%xE>_B}Z{fLB9ATiX$HZ~&B_ z9KCu4D3Ice@Sjmh)7S(e+tPKey=?7=P}VU`DyIkq_;@1Bf1lev=mkh1Uwn3_^a*$$ z3#0_2jD7b!g%?=I+^Vc`mpukC;PY-zSm!9~-tD{I>(f<0N}$9VnhTS}ysKw9iGV8J zQaOa{hjOKKF;`Ub(7M0bRK10LY6jww0*av;HJ%_^YqY1N2~AR2m46|vhP>1p@SFsu z=5Y9wQ4v*1Jse~%?wF`o-z~Y0j~=)*HoR@B`#1xu-Y31~o5F9JJn(Q|R#@z{Si=xk zF{IG>nQcZ7x0o(fZfsV$o$C>xb#REQyUaWz+x}Jk?c6qRczFSowK%CNvi}aGPsG4P z)3ST9%A-zysOlI*30v7VSZZtQT#l-))XUXrEA;H^7lP2rR&PX!@k_N{K|rT{!nfhJ zpz1>5Pj($26uw8^JgDZO6)^*Z3o5m&$7;vaP3@W6m$jCaxsf>VT1K@g@ zw0V1z#O$u8pglFk=tE{DarL9G%1vC0WO|@Gqc3vm42vFpt*y9S2=O5+YH?kiaH=Gf z?|VGvmB;>|?7z{89q$j1=DAm9(O4H{JF(TaAv|>>y8djzR$t>I7;)XRYQ#O!r4Up= zp&Q=XN@47}=+9@~5#Z+`v21Q`AKc*s2#CgmXze5OIh;w(33WXlkn(`|pMK;Ctjzv*q~0k`Ake zL-)NWp7`%srsw?5l%beC!7Xww<7rLJ&Fb%>enu;Ain!S14_ot)j0F0>z9wRSNkFHm zu+T00h9a*G)D(j*ZoOmUN>j%1KIcV&{Z zeKFCkc4nl?elQ!e(;%k0WS=zeNWZ+P8XnOMDT1tD*i2HO-X%^b;vRtzczT_wO`X zt>mYPpUOQ`w!)zE;b5~7ws@5~34r46i5m)~k}Yyz5>y0t>$*kQJhA@a{J3sYaD=l> zCpkaTzJxPr=|&r3&Nnql-a1#@N2>?!ASw%$(my*wNGp*V9y67OX>OqRf-QIcd~3lc zP%dHI4O`e;XqQ@s)&rV1YiVSQ$f1bKcVj86+s)=b-*J(z7a&ivwe3*^Szl07`%|e; z1~p&!s#B8v5L%-B^T}^}&^&ADVlpGm2nx|rOp|+qCtTK6JG=B2;0b3H-^OAj$CseGOofklJaZ#)+GwC1HtI% z%3l(wvZCbxU-GpI)BzA!jN)Tk5(GfJrNdQ+=>Sp!Z@U$HK3Ym|Vv$C2sIAmleb z-h_R<%YR1?-+NE{X(WII9_BKV0;O#T%X_RPn4rpIv0}FRItVZT#uf*fGfsW!>dnYV z$E!^kB;{nHMf6*w?fPp29r*%OJJa0QLRh(xatARZJ=;FR_0Ttr6kn==XnJ3X5u2BE zDh=8rp0Ucr_{m&p3|jlCZCDRn2K0G}F{#4N#y~Q#&fA$b7KN0#`;-Z2K+5x&3B!#q zeXtX2T^P}xeNa1UXIHXOO6iEY1!3sTy&w)o-zb8UOu8qZ2kh?@eA)>fc+%wQnxe65 zT31a}=cCX42l#KPp};(-q#M|?dLE>aw5<$R|kMnbc>Zuq~UfxqSi{!Wqb zpZGuRePvjb-S@7fluC+74Im&bsr1kwrAW7QNSAa7N=pwVptOo8-H6g1B1lVj=TK+Q z0PpX-C$8&!Iv>vY^MPw-o?Xw5wbx$jUibd&vz=y{YVFmti)xP z@IepD`EEC`qTS=b^ZA4K@u=d|Yrr4db8KvUt zZLH_2!zLsbhUq7_du?$%o~h>^^0@&$lJ+!WmrW<^H}2;w8nOv2R6; zuTwlDvT?}}^>guk_d!u6X-I6*Sr`3XHuP2b?i%LK+ld)W}hPgyu@tzM% z^9RKD>t{qfZI+If6Hz^4^SyhHcTYAFyT1{Qr5>c@6Y4y)W)yT=FB-UErOP`g2KShB zPNDSt9*Z4Srar69+$A8qh-U&KC+&{m;cU0w0O~LQg*sU@!paAw0w-A<4^{e7!TH`iH-K>#&hY+VC(_$yu#triLe)hJqqkrJXktWzL zYh=TUe&vh_=;?hODoraqzLC|!lmp)exLj%|un0H1PEOP^iI^%g^=zahkc}A4l}((! zXiD>hFI1)WA9E#^oPvJ;f(A^Dp&L4IMJ zRJw1!z>d;qfM@=mq%N!LZg7G~9h_?G!FB`H%+;`RGYUvf3WT1(rs+fFvYF+IO%ELf z9A{;rBv@y<=Kf>hxl&rovD&`f9WXJ-!U|i2Q^ZDEj(XzJvu36_t!ksi*!3XX?zE=w zrA)%2t{;5$I$b>+4ZZMyAevH|~2PT+s|av*|+EA(+7 zK9h10f+i?1#!sCJ$pRDP5qxtGFGlo26~M>3;a^dK9s+QMx|L3Hz%kv}(v*_(fo4LT zhbJ^r7cqP^j2A31BQ|0>pbXVV2>0syxHh^=_bP3MvKwuuUCCme#c?JZ7cH%%qck6~ ziNH?VU6w&|eaqqfq)kJuZXVQr$nc5=h^y9bpCjV!!(1{Z|{ovjmzz*c#7PNZkEQ zTz~Pf05Idp(sue@qB>9s09qfPZU2hpsrw{JabZteYRH+<(TbtX-sUUq*mm5_vt z&G+=UnnKX~o?qrSmW!oK3os!hm+pM%Fcz@G>$Jz!inG_v(x$t2N7Gf0%Nz_o`D%y~ z5*!?bi(@dw9a&7nv>vbWNlDRMt?Cp36#^V?<2f5W{8Ym3vLagWfW=Ao(dM?lMArw_ z#$=IrH3$#J2MKPSS)8i{E=%qY%*SzUL^>ssbd582}pP~VfL;w;;T`B|p6f1c0XGMa$b=87}b zG)0=8U~27Ca`d~td}2aS>qU=~*UmeQ7j?W70IxfD3J4ERX9$h*g=868kPM9eP=rL2 z4D-H%!u`acrK6KHHTGWKlv;EkRyfb4!SzKu>aaR*-HygVkx(>3XHI?k}wn{gaV zKj$8qY|s@(w6RftE>|YXakLEl3#-kJ4pbp>ts1P?O~}>7__(oFvSR)bWXB5!piQADYBUtao&}yj7aW6c}xGdBHITGvx161qlJ!mP@zrNZWS z{=T6)u=aHoN75=m=mE@KrT_UQd{9oM`ye;_x@GdIQbC;Ysb9zFG=<+ca9P1Y9*K)`Jvoh$@sFY$X&X`IH!M%gz?Yr1b%Co;p<7JN6q zPv+2z2LLt|*to02(2Cx%^V0q#BNXB5lFDpjoYH{-l zxQ>O+@Bo3=Y)+{Pb=A6W@IE!-;c%_5E*-A<7CDx`mdHxUf3*IkZqLoE|GH3W-(W-rpRXn)C%3lyDj&znE?9Q{ zqMTokG8L~g<@l8)Z`3n@#xyp6IDC3cWjR?@oGRiy@~3HGQePO}UTC(D`3^(8MIksO zB!MY5rz7A9*SlG6v;XwRLN^sTe0Phk9|_7ksj1zitSwY7W@V2~AhLK~p`6*65nC`keH!g5{mR=j zHb-{Xl?uRy<4?^n0|Eol?Nansb}i2UJ?ZwR>F*>Twva8Xzk;g53}=P$Q*zYUP!9e7Xa>MQbzdK>*6~UlH?a@`PKYDpF2%#>pVqyQ&X0Of7mV=&cT!DnUfMl#Znw5_;wJ{tJTt&a zrMPI$f3%Jy2_PFgpWaNpqN)7{CYPiQo+#)E{lAk3BJeDolJOse>q{(2NlDo(cXUDm z%d=!tWbEQKR5G@xsKomr*dZ}dkQj@EpY-6+!cMO!a%t8_@z2WxjvRb)a+a;!>8@K3 zcRH>RkeIlV$;qpEi-=u-lLVV~YwPdR5O9>jG_at!d8tGM5Cum@#vKN}_lRr$yF|SS*UPr2?!{kU z%2jAb4r2pWD%YeUoHBb-?L8IPsP_mNd@sh=!~unJ2ihXoOKoul-Ocp#ky-j*2WJWx zeu#93Ua02~?yC?^tgY1`$G+%G>dYS!(DX%c^m-#>)`pzf&;*kI&6&~pqqLb0mVdxK z1S%LB49Zi>Q|17S55WeFCKo8uE5!;N1WFLh&}_tV|2{Q?2l%AmK_l*ekL%20?**Jx zORmPSUv4J1!0LpAW6*BT-A}#Ke8jkNIkoZqplL#tAeJPYviHFyC-XJ)wl7%d>|cF4 zm_yob(`o4G^KZ5`BHp+g3lYJ~hF|gPN{Iy&vf3H{9UAlb8gw2!^Zx~OAz8)XwaaRv zOwj@78BOi+7^^pY|K?&UU037yU5#_AO)ttPMkwoJmt>mPjF0N8&GkW~8l-%0g$}To zZHY$XPZoon>BUV_HeJGLetJ((=J!f)(zmzuX}qK>^jDw-2ys5GN}qg$ubOMB!4pm3 z69SL8r=x-7juz?}LZQF?z1xHPeB$`$?5jRHdS+&3zr?eR+2J-e4WBFVs+ghJqS6~v6omR*<^rv1^pgM7_Zuf=qUB9{(65G7RG*pk~bOqk}LBxIQk z;-Q}8D8D24o|FSZp9I|EljXPY^7THgaPR82_;QQw`WiGWHAhE9OZg?|*)$E=XcV>lV}=6EWT$~P4Z&rXNf=HV}R@aa)GgAQUN`4W^QK9efx5Fj}@TVp6RU{%$jS(a<;Td22^c2$k}|fRJJm zr`X4#iV>%WL)Klz>qFn3c~&d6Nc0W?LK$Vi6KUMzG};}3poZ+*A`_CxXxRzhIDK2Iyu~06DTX?fB~poU9u^1cjExt&P%c9B$vB`37bD) ze2AR^^x?7kwN??VaEEVt>_C4Ltg17&-Ja`V1UPgA`nN~Xa=QxZf1~QI;Rnr5KUdnc;oJ%8zo~sKP(^!XLv~96qcp|U_G-FC~opzx;38>u%xs{ zG#=c-#_Pl&DvL&A>^}fGO`Hx`K@x%$Y&gr-)r$Je{KMm*Dt|+%yE80g#FOtxEfrk`ls5waJpgUVp0%wOGG8&tk3zz(rl@b4l<>4UCxe`D_wOt$()xKTaH} zf%}B|uhsEdja`pk`|ZXylO;iH3;MUO+hTJBJ{=+>xTnB=icbCD^VRzOOt-l#Z= zG3j>j%F*}yde3p2OyU-CncXOjhQ?>MaluJeMRFb(DQ5i(vzNh9U_qo!A4$o`bTCqT zGu*Ix7@bnJm!LhkJo=cx#_GIzv~akBO`8`6f^CvhR(!Jn;p^PIZMp2572BvYw=whU zCSCv@^-SpXLi0ebvV3;YV3yOHb(PG=dP#zwX%`egHU9%dLkB`A)B|D_U(CY9#xX1c)CPST=WGtmZ6M{CC@C z7m+W><&Dl_ud^Z8*|p?rjO}&O!034Vu3U~_=a;C)y6zWsqO>Jz>(GUCZ_Yo(B0 zDN`DGniF4!)N$_aws`?kxV56QosH;B#~(sA9Qq#ut>1~_0>X3r0!e$I+-`J$W7}hZ z`dIx~B9ex5;8MH>(8g$RtU3amYAU(5Hbb`Zx7p$m@OHqemLPL>P%)mXnlEp0KKCbqau7=sXPK4cq9ItnJF`hh8k5AxLiS|9$?O#%sVFUjCg zf)DtdI9o|mXv75Ae@VnLf*H0;3Id1;aZ2>e%vz4et6Zv9SIY*vSG$uYOM5Frc6ED@^`*Ct z+xd*~qI0nh^~rW_cgk08DAN)2>7H%({><;Qh%3L*xL;7Pkxgn~<&#uK)napp6HX&- zH1={}kEpI$)5y4qj9PvYzCO5AK@`d6tM9p1O6zYg0iRc!OxvuPk6=u5_Bz@pAG*ewhe|5M9k17lQr4W)%-u_7>E>sl z^c>@CLXTUBZ%eon@j#0#X2@VrxgYjzx6hRM&pR$w zpNrnBb3cys@!G4Xys0E~6uA3a_&~O>4sPW!7q1xN7qXlx>g%KJv!A51TD5Jh5!V(~ zFMM|3c&yn!rT2ziuKavGvP;x~TpVwq{&0ZQ*5SDEHplPgSf#;-MGjdt$$GATc=C;f z@`qL|LFmZ9=H{yEr{8=nw;lBD``k^v!uIv!A|EXa&sh;@`0R!54!76mDC@nHgU?nj zzzRv}2tTNXMUy|B?|!FqW^+cu;?{U_{b0fvP7BULWrvu;4E*8Q1jZrXy$^I!Zwf8?6-{)A9%sv~ zyuDSLetcryu0uro9G``mb;8YoUq(!U`OaK^9R{!S z>q8`TPt)!*7Kh(c{4Dv+V=#I9QT^qx(&6XuIig?ALEozs&>g!_A+no989-J27k*k_WB-SddJwkv4i8(DA}sji?!`A#b8`} z;sbui?e*f9=_NUu>krgrHTQbd=CY+zzb*d|cAHF@?<8Xv^`n0JSplCqI5Sd)TEEg* zoSBg3sesyK2S=g2XyuW)DJ7@XROg&|+*@OxV9lpSYa^r`@q`KN#}*z=-%3n62LZJq z{~RZ$)@=U(EdI+IVmi)l-8!B%byg?$1xnw?2IuW+bDUc&RD~?d{3oqH`-`R%?;Fz* z8*R%50r`MlretWiz+Y{*Oj5S~QikAS;JsNN_?{^>l~3(`!RbuXV|sm3UpDSimw&iP zMxP-3A{Q+986bX zKG+kDO7J#ucVC}2A_?UEbY9wogKg}>cRs#j#%jhZ701Fp^9M#J&s37jtO!1xdMBb+=;^#<;XuurAKC#2qgBoE7tiE} zo(c%z8jB?hQ3x<5y_hplLxrieW1Hl9!_~_A0t}A^X6e$#F`TaQnvzJsPXh!T778}{ z`E`V(L&|1vM*H2rExUT1s0>iQ@B28ikQap+J75_ipU$8sr1>BW<1=MSr@Q%#>Fo0& znopGyAMN!;UZfZ&c@SdcRy!z-XbU>P2p+V(_o#Bv6+*N}p&Gh=nq^9nduq^$aarVL z&HhOoV3XJxZ6&$vq^sX8Nu!{N`waFd|1=T#!Ehc|7Segb=NN?ino)=$O&-DXXpv3c z;G7664OA`+!S`0%)#4|ub?7c&;X{hXD1WGNNi?%WJRU6p)F?dp+BHlWIY}{9R7C4$ zSuZnS2d3(1yg-%@m$TydU$({#w5M&HKI81iZX;8IrFzo$@q_-qpnM;ylpdqX1 zMn4)8cObSp!O_?IWcDPYp^I){L0=m*Rf&VVz?b1sVrg?z3B~a2G&{G zHvA7GvyXlcf5ge@HnPHt(MOcef`a{DsgZrMDDN+w$?@|x5As{$H`MV}aPyAYYP~=1 z#UOJlXY}1sL_$pa4BSfben&C>OlY86kHVizyho2*%_wJ%> zu%12WY{n|yCqDTmOM53IN-XT6x6F#5yQO^#(v#%go6f}8flE&`7%$3~%+Me6tOPXQ z#INu>L!#s|FWO_43f=wIHzW;e*>ENS8DcazE;B&tFtqA@BtAy`9{Bu+L|(|O|H}`g z@K_=Z&c2&z7a_t2A6dHV{(ym#XaBnavWF1T7OT-jsZ<-#+VHY;r3tHc<55mV^Mo3V z>t=d*8ricWOHWkt2Y)L%u`{Kro$rL@?1O-$S1f)onF1H6jz{9s)e?N4waPo_iYmf$tx@R}H&KspUDu3x=({r+v}g*Y-ZbkGbR) zUS``HQV1Ti5lfJnQ z?m(De2k(3UW$wt$Px5KqNlX1|6mjW`F6K*b)ZB`!F82i)?;*ACb5Gn#-CmTlK%00_ ztfzq|q~o}ZpvjN6cqB>20L|^0+C>fr$lxwbnAVjwfF*aB%ultAMv~K=-I-& zn2C54`N#3d6z-1W9|a4!-+V+YI`>e5)VM{`oz(P(cq_vW-T}VgL!$E5hre%fJihru z(dHvTkwSVrqYZ(Pl2_-Ay-Q(K7c!$r^^}aGmF&Az38hJk*Ju2*pYuC95~O{>o^t{3=uOECf2hpc{k|7K6@(Wx zkKU0W%g)o*OQP<&t*ipd4%M#s`aJ1QHHH31ZbkiEI&I)r{t?-vhoX_Eq3yX`4s?^Z zaqo|mRkA16ZXZY;7q0@nv~2-xZz}Xk@W&XjQ8BFrmS{`F8%j(13fws3CZ5luh@VAs8t#3y%#UuRe&A`r z{bcxM);9f%d_it8e<%H&bECK%b$^FjTk0Hkhi1ojSLwG}D6eANoa#*$_nXC}7*ds+ z!1tKafDOpex(1H-$uOfU_p1mpNoqN$VF0ghbvPrQ?F>`ZIQFig^q>l z9-DO{30Eu|3y@VGGN~4)wvDo>l$(foq~6Ek#)?i9fMh}B)phfF^V5bNtsf385A_cD z=FfKCu{p4LDAaA^Ve`@kfSO97O=j=h==STZv_Yd%x-%1S$qcDqI*m{_Q2E`!qA#e z7iAKso&&czcM-R=zOi0rQLcWBzLK8dH%7gtT&a2mtB_%WqtG8WIB8^Fg^fm!J{ukT z%3#otFIHBTv-O2vd$ushC{Qo#+k8PnZiUHWX!}PF!Xwx>^W>&ib2Y{KD|DqzOMeTUhCkdpI4Jj zQ8)bcUKG6`iQannv%RJr z)xO-0(6pqIrpiR&Ll||Vi^8~OtA?u3(KhW*yTyvdvEqg4B%j?x+yueaInH;TD~5Bs z7aHX#~`OJ$IL3}`?8g{71Ph@pNNQK<%cgmvPiMmvMjPVv&t)jUOZ6w z^o1`M{bDxyRm}Xas`iC9tk%VEI#S>9O~7rQ_KB4-Z_Vq35d((`|L|U+_Kdg(ql%)6fMh^?*kjll zI8xc-_GI4KHX^v1xgvDr@`>{e3IX}b`Jx4j!;8cHJIg0U=Y0?8cnnI{?tZ>WMznCd zBU+Z$`We_tpChSG1TLzMPn+cW>9(@`$5XgjWj9>DJlYiGlsv_K`OTy_#?Z}3Hfa&I z-%WQwca$D$S~^8uci~Ly%rmBly*K7vf;P%R20iWPEVmu z;PAZhJcKDA=&L|H{~hWBSD(p%HI;+7My3YDxXT1$+5d6j&Wi-wS89Xk(#=xwpvu#t zgTf-DUT-Ar+6lL z_{pCDrwbe0I-1LEQMQy_)7|vP#kkqZt6}`UkOq=z3b98z@_Jql3-&Ypaohpi={$!# z>J-+L_Dn$_wmns}z=4HRv0icHw`TqQ;$NaUqOj_t>S}K%Zwz+c@6|=;VC@&*SP-K# z@Tv1xhdeQA29gft4hz^v`Q4qoD^#rRfi8LBAz<4rSLA`1Qg1P|!#L5TtyHj7*8aTD zt4gw36ecQf5(vPWo`8PsV+8I9ELnW6|Lpm;(QH7@k86EMp<^$$xqC2m+c(_H$6^=h zf%|qm+HDqH7KP3zhc#@yyXYZ5jY)y}h(p~bW}Hq(kI)6K4O=UiBZSGe#FQh}XvA~A z3_GoUEVFIa)ZYl+Z(SgfrDSE4+lawFkYA9Y3(7e9je~4*LIOcrf?Bt-P}gg@?5J^1 z{L3ueFQ0||i_8mz`TM3ddP~a2OWFP7@~{&Dlz@7;NQ6^FX+(d_ySOP(_0s5^b1N1# z7%IPXIeyi?)4MJ|ec32VEi3lh;J44l^w=@?cCkF~`G6p!%;mZDX$MN{LzccAIv@wU zp8hfYVY=zbgX8K0=pdqI`*Jo5kgugl`uf>e&y(P1_W>G2b7rD=OAqm$_~J41r@_kY ziPA5$o+{1B?{40fWA(vfUL^jAB4mCv2pnE4rN^VYNx~{NMcyJNb`J*qV83$Y*qZt5 z1t)Q^5BVo7e<-+XQ{yVN`N}bb4LiGu#8cZK1908^_~bf?m9TxSW~Zr%$9;W%6AwSa z5s&El4FCE=cm2V`BgiKBk1IqO*@XXbj&J_QmrQc^PVw**@YJ3w>b=9?oW7BLf0V9e zH*Skxv0j>8Pf9u7K=GOO2xkoob?*2RZl6~}Lr)k&Lsiv%eSKAt`><|ae_#8$R?|Au z%hjgq)0LhyVPx8k<3(xv1zWYx*h=j?3V8TLe?RP~GB>N>25uAFWDdsr`@tYMm`a@B z?^mzy3@#)f%8*&k6a2?`*UumTOcDNfqh*raaoQ8<$()3xxi}IkmMs|g5w6}9S_S#U;1)X2riBHkX(A+|=d;gT` z=40(bMLy8>K7uW<{ot|F)Oj%_lMJt!#Tx%GN6*JYPNf#EF$)%{kN=PyeQ&^W7(s}v zHAt4IF4e`Xz6l)o6s((3zbrxf_o;E^!)JIQZoS(k0h_r0PFZxg$e_;O`fx{zgPgL_ ziI?^7rfkt<&Gr=gwQS|(O7pDWuwPn?h5%4;vN0h3_mS_^76b$C1o5Pd2h9*=xc+KL z^nCHR`C^W}xi-A!`s*I19|H;2o$pR|{pH*(`Ok-vQ>%V{GEctM8Y3CtBJZX}0C!Y?w-3by<4V3$}so=5zChij^@QRsejzr2Ey;(>udkD z&2Gj|0k=GrUrQ+CU_5=fs_)z(MOVX4s_9LrrXa&W zWC44_K-#*S8rHg?%pK$K&$3dTo(LM4+;QJ#cm^eLFF+dW8O8ASeWfqkTm8Tz03Fy z*J-4BE#mg+;B=1XWG>2=%|7$djA#H=^^#)?3gt1B9gkMz1Zsg<=-a-z8%nO*gsK`x zP8*1=B_1$cd@(yNCA}%>-3<-aJ+o_VEy{+mR^&EH2I)l(XC3u2QqrgnWhcNj`+u%u zpQScC4QRw+tL8Up#wlg($=|>Fa}nnX#LFw+RZD#m2AmuD1*zN}p_BAocR8SMtXw_L zf6+a1VQqVmiI1M9CMUYZrku5FdU6KSF^Fx z%IdMc$Bz2UwaG+B$ipZbpvQ08=zz$Q0=?wVKr%ZHXnDl7HUebtFV|Qq7Ro!OA-Wm zfv8U0Jqz#Npi*bgY)kb^gLX1FZl{FVooy=V+Kzq#hu+?PX`#zqSkAW}cg_WVl+Ih1 zxGa1o3db+{pkm+stDB~`{(Nt-Mr*VAF0em=H*TbF@S1ik%X(S5@`gOLAv<}mZqUKNGFK^`Uh-HMt$zLisjh1oApZ_kE z*?l)IuPf>_qHTPXvJl;wj}et>D>__qlkpf{rRQ(j|;GMoRhAARNhWt-;A| z%_rtFe#OOl<*)RQHfMOYkyi;N#?_NLF^s~dc9(MMmA2j7zB5K@H9!o1lV2eF)j52` zRSUuq(|F`ZQ}nu`aZ~i_bmD3?M96keuL1Xkaa%ooq-ePf1<@&bb7P_=u%U4j$6#Tg zbN+jGIoVKJJ!WHc>DqmSoJJnfO1P`OKRY^I&^0N1b|P)FkY|yv-(8a^?#76T(?}EQ z8YvoH#+Eljs5k-Res507<# zPt(wuF6#W#RNhZZ2kJajGz?#1bY$l z5AI@$K-DC3g8A1+8w&698MW~#+O17s77ya*9fWb z(uY`qZa27!x-nJoH=V|mq~h1@h8FZEm9vF9$hWc>bJO!>P>Wi^IXuHlvR<+9N08Ef zhR>mBr7|I(REb6yfbO9OfeN4?ZaDVpsc6D?<|N`z8CGwb(LDIPb@y5ZEt9||0NJbT z#Id(y*y1r?`i0d4b`Fct7OvP2@E)E&6i_(05et*&b=KlfW#LkPSxLmTRM$A2=NEoj zdD!z}kXZ|YF~+nTX1;zy2q>pHA4kZ7BXfsjoTWkDz1Ee#*Gw%q$qIQNPu~h;Z@8T1 zw{Qbl>=is=n@xFW6{;rf9?-ReTHt4vyfv09~xv62=^ zFN};)RNc_7z?H***Sk2Iz!D)}ZXH%q7RStuC6pS)YG@<6b?oPAK zUq*^_yDy8s{E)W}B{wr)cR~JUlJ?P7)5C zVw{F*-`=8^NS9spZ73%e=QikKPcEL~o1P6a^gG_-6m`~1E#c5+=XeKN>b#4BAksCU za%z#(T()Q8D59uHY98(g3Wmx+F8kPIq(TsVQvFynmu|s;Zt^=0%VZ&CGJ1)#L-Z?; z%O5u;5N=td1?O6k)O;T5Hu9S3=THacSLNQymqF z>J+M#>h~OeW6oDkt(5m8Cwsul+yp)a7}vP4f-epczY#E26(4_~Xt0E3i)90HMt!wE zD|LUhFU{FM>pqw9xaofF*@vH*BFO7EUkFrnZZ1S=PoZfnQO?@$BxuKQrF^k_PZ1rr zoD)vjy#zv{fD@If5i4=s_4?R+lQ=ZHMNXs8PDiUy;DQD!o-43Zcyw+A%G6}HVG}PP zcm6UHx0A>_sfV(A{$XmJV`gqbcjKHqgO~Hl`!en2CAR!4%J~(wGC54;?9&A zO(w@L&+C1^Zd)29PPrnmk2LEq67tDydu9}Mo+=Akz@k6FJ3Exmx?WLr`nlRVO9t*H zRiQ5Pre~}i3D+CHHLVrfNl3E)z)AiPdS}dEW_1ikRV7HO85AWpm-W6Is%f6uEAuiQ zd)B_iz?((|wN!b;MW>Rg5vhx4@rdZF@9^3{{YCjhf*Ki-q(t3leEPw8B~ z>Ex&AeRW4nfv=Rr2KMa`=)jO~;Nn@+ho1J|p{T5>T>@j&z_;9|RRwN7t}Aw8KdIcdN8c86RDsHl zJfD=EF{Tq~QppSuS0QqlZIp-X!QktE9S!m9h4`jf;}M_JR>sfYRztCH+O7^*gP<}cm-$*ANMXs`iO zL-Cfli?7?s@sQ)XpCHq=deJ6;$p1^cn_n|}q zvlG)k-hDP!SA)5?CK6ZTH&Gs#!&7x(XjN6`*i7?AdEnJn;-;GC#7Rm<^892>n@?b* zIl@~DlZ8ObvNE~6Giu+Yby!K0VvgenvXz%?J;4CnlI-`qlDTO&q$29pEkvQuBZjA~ z!eQ$&%78(T*XhB9qE~a?#Tn)Jyz{SKZO+2A0UhqAA0xXyI zWeB@^N>8(X`b83pvWKMCPQ1WP{UROZSs#X)LL*(NHuY}hs~WAW-BA-{P*`C3grr;3 zVcEq=t%}zA3Cf;hm}_%}w7R@JfyeEjD6qVFQh&>Ne1uqaZwbZsMdj+=Yra0datj)} z*OfM;vvm)|Xte-?lFd~~Jido&9kdr)4279}S<6Y+1po#?uNnVJy=aIXSA01%@4MWc zy3dDgTD;#R&-G_Jrb0ctGr46j0J6T6w5)D0j-V6aHs$60UbVAbJTY9T1xv}ll`47d zDFlK-5fIj5#aw=xpO@ABMup{c+CvM}&&h6!O+=zk)P(~?9xf6YWn`;=wS%38U|^QX zaC1V{(=Yf8KodlSEycy0-lqyf&fpcIiVV+TLMd#S_l|aJku&b6*q`Cxdt!qrsdcCAMkVtp*G|_L&9ZtcrW3MAZ5$qy&zk!4Tps;_dSp zz>BhwY}XHdsJx3Wd9DTRiKyF6!;%?Q8qzz(K+X{#SdS?@jXS@XAIzA}LTTqe0x+H# zBue+{E8V-wGdTKSWLp_+#b{SIFHH#YpQImdF8V?{s3@#zD?1+p|7u$ zoC_geij@ihTA!_AcQvxND+9MPj<=Ehm1y(TT`Awoj3h4INxK%w2{(W&0)3rCP1Im& zfIy+%K2|DfbXb;nhhwHqG?s+%sk~6mkCBhjBI?HLygn zK;!kn(n46ndYO}NdL3*CH7&Z8)I596`{~{XTQTxe$BzsisUk*0^g;X;`GkUW?jDsH zQ{nB7mRwKgmZ)8Zq(TmJBMb3!zFXNCUfHf2107mk5DL}D5f_SlT%yi?%RwFYB-p#u zTG<y0lp z;WZUXTL+{ddacq5t&(Ae4yfc8F8~KD*fe4ZC=X|!@|l(g=H4D+!Tz*M+##{;pMZ(A zRN;C!r1J!v6F2u>gk;uG^27?H6b1dn`{}*OVfIxwCiP$vS6ouG0 z+YNlr`H8EvJJI+oVerkcn0vH$covWro!`+rj5R1TW&b0!Bl6QM*Y{#^h4FkXK_v}t z<_lW4pM-CC<4#i=ciW3sNUxs{q;qkaY0P)_1aKrXYK1Xgdt7R37&wK-{9PaM3$L~5 z3qE7z+H!-3Z^AESN9yx?H@U$bFd?rsfuX(DvwZu0#u}!k=HFe_FsOIFHM+!zYLJQV z!>tFYBwKE_nfKBCfTS`ATtIZ+(~%>QIP2ikTQ?u9YnH-Ssv<1WYyzp+`r%+;HSSXW zIH52zXefOHvL)s9t|xh*E+QkS-rx=UpVd(`X`8U;j^m(OOZ8gb>}gWnJcL9Y(YM`c zza8j^HY_$Xss~T@5<|Uyp)cf9#WVfYgVshduI00NWqL&*M2UGtJHIq;BXoZTRQLkh7J_QMM$@LPsKj`1(x zpASami&4l}Kxe7qV;EX33SP1e6N6oN!*r)Muwvvsj+SasU?i+&Gn@x`epX(bA$EaX zJ8SU^!5ID7vfO#WTnBnHofcK784EeHsFu$hD*me3;ik!skEI-Pw$0)rp#{zXX&KvmXNTr2HVc~8S(pl)jvcHHUhNh9uB*}uzZN= z6y>9>Y2T*aDX+8Qs{~e+pX6}|@}IDEa_RF0`#z4u3^)F0h%v7ai9Ir>wT{8MpAzjZ zM)eIYS%|rfvlQtRCW)NlQRv>|RO9vL5qVm!W^=wdc&TrF5Eyb`SV;JBkWl%{?n6om z$1HQfpBn`53`Vuj(x$#==Hqm(Ml=LrI*Jc+Ygt@sp=a@MZH&nGFZ&#lKSv0#>mD~C zE)sKI^rfyD25eU+{ljXm@>T9pGM>DQpbo=Uj zcTh}wYM1Eczss=xTZ06Yf2N+}vmbE5$K+1dV@`IPt?owN0XNau{(08`^B?YrNAOlv zA!1e(HEH}uCDZ>h>;3V}DGly>vxHZ=cyG}i_j*VE2Mo%8%80t*`n<()J=bGUS!nt?U#)Omi7p?c4PUqa)ykm-vj1+G?@-m=#phhzk*ab%jI2&XYRhqH zOCUiCssZ-H;5|AhAwW>du@+fI98a+7LH599Z#M4uUq)fRBN`HD@2dE-tpFtz&-=U8AycK%v%it`OdLt}|<6uR`dY}*$;T^HY{@!kuQ;Xg?z)k{5&yHHe|*0`dC z`TNcSq46ye6u2w-R@vF!r7V>us&5cog7PAWXayrX! zSkW@&_Y3mN^lPqYdYZ>yWAXU(pGqd&8<_V7O4TIRlRK*=EPP;Dl!ML{AY+DPbSWD@FU zsWtw8iRB+7@$d`3I=rH}zWQ;IfJ-+&zDBDmi9mVp*RJsL!Cw<5W{w*-C4S}c1OH%J z4Bl5w6j2j7Zfu_RSn6}7AFlze9*VZj{AZlM$?UFv>m9ZGHJX`-B&6xuJrh*lTPZ&YbBw9%Tf_pYNu<}Bm+ zO28XkSh}c_YIA@fM3G7t-K&l?CjX`$Mtxi>W>oNo3v_hIng0^ka7~o)8k%3PN#s*8 zK-UXa2pKj%^4R^MEa|bPsv)8U{Vys--DEr1kns;Xub9fj9l7@go*WtuT{QXr`~%aU ztXgNgMl4@V^No#U3Hz3vOZi3PwBjiZ_}|{Wyxvo_VMAGL@gN z7+sPee|fGnS>wuzlk)l=PBE}{-D%&03&^+`Ix=xYpJ}dK${OBd(_r3>_Qp_9c0c#j zq@P8ro?ecaWCor(f>tk%GdPwUJ!7gerT9ds!Yhq34 zp*%HfzU}&bz+m>Av)OFCCRA+Q4wWui+_<~p?4eTUQ3*WrIhhQ|w7(MVa2;Oce_hpi zWOOAn;Ez3_v51GS_Mv<&z}8s3Z`!YdvZOTAr@_dR+bhY4`sQnbg>52Ax1e*?BIxlo z-i$cRJ?-ws8Z?*VgpR9Yu*$uxQIA#|vV0-MFAdHHiQdIxs?DJw+rcUJd4L-HGq)Z;rwS#L_%TT4 ztWGtBw`hqc2~C`N&cpdKNi{N=i-qpKRM0_fiCLZb5pG6c!qXPAU>n(0!sWR%D}42G z>%8t79w89gj{D$o&?#twbSU~E6nL&tCu?socx*gYHpOfr?z+#wQY2YMT$f@hDL(!2}SXRJXUB^z0YeXLE5Coeq)8B2 z9~zgl5X$m4J6^3qZu$iMm5ipadGF5>;3!wUJN2BkNH@O*pA+b|+4)K*-{ZkP``4t~ zOfq_r=>pvyWj3XGf)*at=R3o3d#6w{+oNrG0EUW9^~rV9I4RWv#}asUoEqkT^eHIh zyvU8|q)tm-6110h&Au>ynas_O{Oq&5XFJj48)wtNS4{rWwAS6c(grylbkaQLLmouV zB&5M_R__odo(}UqJ#egB?2A0=ZXmAl*_yIy#!Oj0DAXLqRRoRl@V3_P{2!2}#0`t_eK zQd$gXi?az{qBNL$QGV%CZxfUFbPXz3diYP5(Of2DZt-7K;?&Z}e8=_af(M ziNn3hh`JYQf21kk-(&y8I{bFxlUHv*UogT=znhc6{5u4iOY6>A4cU zIRiJL6L;p|(P%L|Yx3QeLnW|Aa878Mn<5G*r#Web?c=lj4iV+RD|8oy_}O0AHk)WUn#^AD{lC#6ya%u*EPFIYskikyTq z!VhU5DFiu7N6224Ii`%KK579obCbsE|MsT>WZ;Q;T?JpuF`DUMNt%&fvAr{G}|3 z3)pM1g(I_+ff1>)L~yL^_{7~!Yt@)B;GF6PN7Raoo}xf z_Ux1;i+QdY%ca8#S~+_22E`8RHw34;XeMrXc=_yEfVpBgolvl8qdbqzsS4++@OtZZCGM&Nab+*G@N$-uMUgJH6 zp2nirC;VL6Ro`#Z35^p|Z;MXY`cXPqzLpn1JsGriUbm}E%E-mf$9UMJW}NJGia{9k zYu&A=2430-PgeC!fxsB61jpfmisR18WuDCnuP0mcN)a%-+>4E0-X;xintFe$WwO*= zLl4q)PltMB$WPZQQ~2m>UY4GYx&#*>NHxf@(mEV5ncPN|wqjYRshU__Ekb>gav~3r zJL4sq)@ms%X}7OHRgb2?sw+LeF=ObSO!obsm0vT6_h|Z@Es#P5eiK>y%*~o|kVXL{#T4ZESz*Lanlp*N|I9Oo)ct~G-(DcGEeHZx z?SlN=S|x&3l~MN!ogOV_*_K9?o!!Nq7Rj^J)28c>>2gi5-U(khZ zuG#kakix{^PZiwd7vh*?29EK3Bdz3bDG2=_kyX(j@1lQ$qVrxthUECl7m|S&&9pX% z^U~b6A%gSwua5hUa*(&-P|=SA zdCH!HmT|gd>3H#Wa-+`fk7uhbhBz;!%WrF!Vh3$xy(*}K)N{mQt;hEE5js5$F z9$%F>s*!w5?-acFIeLAJYu{Cv`EHI2syUPugVijz9`|m(<+-s(^WEqTor0t5ksspb`VxcOB@PQcqzMbzPjye&Wk#;N{Ek#QGRRKP>4mQ_BQe5RFS>sqb zt%%!YK*Pmq2;rvl{Id_6#h1ozLP)PpXbQ4N`vo}#HTP@nZ~CQ9Xvd>$YK9u|exh(u zbwI?V<6w!r{mtokDS~Fl6A1I#1GkTH9 z?|zV8yG0BzR|z2OUJZHO@qlai%LUH~&7G&`GKU+#Dg|7KhTd~=LGJcZk%%^|y9Kgx#D)6%SB@Z|p* z&BdQHpCc}?o1qlGw=%LpxJmDt(IF1VI%0Pb=YR2I-()N?;Xzb+ zP-~e}xb=FfQ!fE1FOWnS*~@sc8D(DM>JVlFe+`DZy^2V4m;j!Z zgyaN(_D^fpze|#{N!l*0y&7u*4HxWKC}rvr2nx+Y7Y(^b)MIF zTIMOau3gendDRkxa=eyO=GFD9DX7WcC7GS$4C@z3g9e0o(ET2=gxT>*Dn4aMKfUjKoocoI9Khn1)eUrCgd#FU@CtbI9fgvE){c|coU|* z+?7HVsFO{cTeZ|#mL`}Q@RJ6X9pXAX2hkM?&Y$>X0nLFn_(quBBHzJb5VJ*Zgzv#_ zTwRml`-5?$7%<@MdPig(!!|QlF+&^S4{GP`xN5-Rp)Q|Vl z{}V%dfhXIv9f34h17=Mqm=1*Sb&TfZND*mt--VzS?~{K#r3t*3aarftF>PzRIH~E= z%z1?i+4dgKpoXog3a3{&47|h)D!_>|UK&2(V(+3B*7_{%vNPPY)m||`%oTb9{PWlhvHljeFvXcpnom8c zQphGwYuXntV}_XSam$}Dn&_3$Oa3->?JJ@Q+x|xt6UJ(qHKT5aE}uRq;iAuJK#lR|+Q&JD@4Y7gl7_{zr!jlENqe~TuH7~z%Z8R!SI zbJm2)nbD0P(S4fwmCs3~XH(4m`O>Gqc;4YNX?*Tgxfjd8SYaeBW##P^m3mD+PD|A| zEA(VK+}y2bn)T-Ah2aIFr+PIpX*vgpxVL%EZl^|A8`KQ{S`3<{b?KZ zrlGnh(r4l1#gPu+3t>e`>Q9vJj{(9VfL+Q&38Z9W05+t}zTCFrXt zj7T8X^l_PBhxXb`NP5h=Mua%F$D3!xa73wuY3n=90#_8eIeRf;H6b*m^xqzd8%Zq& zrCCOxL`6OMPe?+ij|Md!^GPv5uE`cu3D~C%quuDq!*uq1MP_mdpjevGWHSYvN2l^T zOkR!Wfe_Uv-+<<(`$DU5;688(gXfi+9KomR9OfI zw1}xLrTj@eOl6~IUD#*Hefl#mNJkn}OJUgKEQAvAFZMdo5u;Kk%$BOCrI2mh4)Mo* zZzZj;NwhZ6qL=2DQD5fXmQoVC-QAEbCJrbeI9_YEv;F^ySo!eVR-jM+*4grw6~Pp+5??+mx$ zBL?ZEU#ZfVYc{+m?0`<@c;S|qzZx9owdWA%mu{zJTJI9hV%BJrxn|ITK-yX97whPI z{r-IYj~$PL*dvqU+hevM-;#MSB{?gUGuy>XqvGZk(hOwim3g0`#h;Y zjW^C!jW<&Fv_k{iR1L3a8bMxNR+Stm*N1M&^9`SbUO~&?NbAB zgxPZCNfxzicnW?Pv08%|a1*j~&`Lnp<}$qT7;07MEq8LRYa$?pY;1O=@7a+EG=|yT zbiCu+om^PIo}o@BGMM{wq3yOVk3GlvSWA)}D>+vDx{CCAv#qThCM?W(<;Zy#%^V5r zFlgSxZ{Eu-_F7Bto7L-(;o6>AIdu81y6QLdsS%b-<>~xgC8Ef2(8}%9M!Yv@C|4_b z*vjXDhTW?h@PNJFYH6~vybYQW7*=bo4t?q*RR5-D)yhL|r;KQP!$FGnkku-P z+N!uX9`+n8l$Z3BJaHR7mg-R_^ynLSc!l$uo(ianwPRU_Y=}7IP?eF$<@X_BUz^c6 z7OqNd1en2|8tv5wuf@5$1wpuj=FvK^$lQj(<^W=eD?kY|wej2z)>obfvBzrbUF8IX zoS9?kNXE!bn==CmFTO0=WM85@@|q8 zSaRksTTbuz6TOl7iMrBLy1|zrMHe0EG$*|(FTxLpI4r-;fF}w~=3l3JuhCz##ay}Z$teOa1}Wz`2#`g9 z;J&{SfD}roersKGf4;)e^WLY=t2*s^roINv^mL3`0Udh>lm&$JTMr_g-}B$y67grj!NW1ytDpZ00{x5Kr0|HEdG3CM z0DoclUt25xMoDg7W4GeZVAg*%$G>lT`#LyS_fd0g2Hih}ac8{-q^ECs$A0?fm~B?q zW4`^p;NkR7Vg9cbq#rIQ{Ezc?y+k;A*JF- zBqCi-JXuXk-r=@WM&ev4+_>kVOvw`lshPj1oloJEO?K5x`ZoZr(7B=-8x z&C7EIcU2uiHPj$%)ruSyR}bj=iJ%(Cz>-hF!Fjg^RGR~e{3%?F;)=%oXDweCzO7#>z8Nh^7({1qKbqw zPQm4#Y6eSAhP1b5P)>|y?7a5xN>1Lt_7X zc7uEEUc!X(*`8iWgHIDp>n*QNsQ+V2eQkK~WndB5ZDTg;X#15g(S;qixS^XHhtcVu zn;QOfHOnwfF5Ns4MUI499Pm&7k&(Qj8bHb)Zoj%X(_58oG$K>lC|i8Vu92j@!kb1r z^21rcD>-fW>ZUOh?7_Nl9z-Q8;Q7mBuE&y|t4Y_L@(H^wZhcLsks|)><4wkq64wT= zPa=oRpP!UGF)K?7nEhYuz4tp@efI{M5JE&m2tlMOL6E31YC?1-(K|`>9tP1v3WDg} zkVF~1*HKgTIv9-J#~8hjnK@g^o9BJL?;mi^b)6rQYrBTopS}0m>vOMr-D@o(=7XdT zpD8=5A(7u&lIM3G_?n(fP+yj6+5M=S5}AR;f*-BwHZ&zl7I8K3{y{k z|LuxsReM)VsM|9rVJoBBaZ-7LkdtCpf|g$EhCQy@5hAmxysVkFOF?2vEOsAE`H7UNf0> zA?sZSse{2O=#s^jeR~xz=v1K<+3zP_L&c^RJs=g{?aO0F`%6l84Nh`cp^NtxsXA_a zuzsczy&lO17tqx-+Y?%UP8d6A9=z?%*6T407I}ELr1om#p;DFo=!KeOi{JTwXIjO` zW(jc1(Yg?b@v_b@SLF;PIo!+N^1!-@T+FDWbAI=DVoA7&PNy!e*vs$c!?J|fz>#-t z-$N}QoO%Z%S@S!pGVhGNp3?>J!-^3eUgZC^WC2+;pXuC5j#W^>FD>ZQA=&ntJ{~6` zk}y18H+-A%m2XFaH24w5NEhYw~PuRD^`2CbMC(FCyOfir4$RGAUjU3 zBG-{Z0)tYJbwg3FryEI7BHnw!RKLMR?GYZ;sX5Cm+yvfa?Bo=4nUCxMD%OX1QRZLa z;R1&Vr7kN=r5}8cbm?!!nrdXJ&o?RS@ENi%c0*%)$Tr(E!0wS_o~|e?_Ua2EqlRDx zjZ8(pH}4o+3ggG@>t0NMs@3f#XEpV>=d%gy;iRQ=BvD_Cc?6f5+=)?R5kX~ta&eLv$SkZGG%a^2OciteQKef3 zp{!kO^jd(&#|y%Ul`RLgdnZOpd2Nw8YoXlB8Z|F7l~Uy0Q^E==^O0>I8f91Tnc#8P zj^wSi&?fB+OKJySY?D;36DHfCY-;U|fa_twtsU&dkDdAMI-X=4M|Gy~-WNgx4lAY( zy{E>|ZMfp!s@Sk6-BW??Z=aCFt)w$`d)nS~+%wME3LChe`}+8WInNsa`0S21Tjuv3 zQbj<2d4}NE&GRajo9o|7y?YIg{j>{P_k6N;)n%hKc!Tl?bD1ly$QhO9nHy8TKe{zN zBt~UHrE{LMW~xBNsd1gpM08SryWxuP{a}IYM$K)iJ(h7(;m^T=-FY8$yL9@l=~eTM zTYN8zyN?2@&~IbhPZv8qYACgC623P@-%#_WJi)nevU1B%poaJemglP$%@%T0JLl1s z%C1?n(;Y~@1UYMI-cg+47xSHp5Va}^C&NZdtI#5^dmx_)+0O85RDBtInc&)+6`G)g z@Y)j(ZA1!-$DbyW}@u5sq=Zzx&+kS^XDrbEEZ5<)8AUFnR)Mf>K)#jQ-v}!^psszyMfi= z{Qx@oR&)IG+)}9>=is*&oJG%1fWJI}si(O}%6;Xa4je+}=?Ypy%V!8WUC`=Av)K{RFq8dsCt~nK{YH2Gbf6mTAkA1& zKxSKcOVHV~O9?OVWtU^-y~49q6PHID0y*+IO6G);ECQIE)S(&{Dj=t?4e__MZ;zGn zS)aE-c8?2Kwg=mh1GxbcR5p!J{0K)5?DNM~hOQaUIseZmOMnXJ(B+1{n(w}wJ_%M2 zfM1+U4&TOv^<4+we+BY=dvq<`XXkONx3b63OPxhglh{Y;*NLPM&+>AIHG|I_g~e4) z{sU@_SX>ryb+^GX7hk*YfnNXi^|mGb!u&3elC_I@70ry=W6TeS=P}n>5+K-mF-$8>Pdh6${J18#e6!tA1phV&4 z7!k(8!viJqbSWFOA=#g_zf)yg=TP}kxb&#G`J-!c)P=lE#iTE1-8!#0mY*YarGnU_ z%8b2e2or8OhBf3z3}q8(+CP}rTI17$_MSTs7t_f{pCr;&(hYjc26@Y8FWniWo&9Rn zCWZ@77t^sFDxAJ8zdwq_ix%m_e6n0N=UgL9%hjjk@db_jh2>6_6QusUQ1 zjY|>=p$q&~U&qGYJ|Ppn@bo*LMwk&=%)!y|;W7R@_VRY3*OfgTJNn#ac?d2?_GH5y zkmrnf@46xkyeCw!SYKHzj4>9!)L({uHN8BE6uL*J&SS4vch|hC|IOAaS5tD5kc;)w z$~7XXolXRK4oPUY(jb0kk3^Ta*x`{V^C;hay3k~e@Q$8x-^kdhg|KJ8WQI6!T{KQX zDy>+lCZSOx_G43#`VNiC*C*(C9uJuNO0qcGLAo_b+vtDq+rO3M0mIj)l$y~!lUAW9 zMg*X^cujT3m}l#u7^LRC`uSCQXuY`F=s@?UHMgiFXT8M-Uekk%J{Xe8%b*vE97K_a zfbIpMO7AD(T<%o6h?Hal3IZDEgD0%(s1|w8BxkfaH1Yc%8m$(&C!jc3?BOsM*~!-f zppOUqqzbLU*E3e;rUS%nniZd&s4j(skOg^fVsDLf4HZTLwIBQUAX4MEPr*QSlUq88Of8 ziBZq6R&7DL#l>iPfH(cvElH+a4)AXv*8pSYCFWpIL)_9*L1SNyF#sIzL~5z{1CrhF0nh15fBs= zZF#{SwiMac%y|ZcN<}uq#t%+2K|zOdg4Id`J4$}U+a|X8Ks93AqG|Ql3vOlmRbAr4 zGHHk(l_sadRv01t*7(o(x9y|an4(I%-m07l7N@2|^%|w>ma|uRs8c3*Oz{IeIas4w z*TW$$2t{2^;Q?K(+Oa!9pLW8nYO? zhcr4~4i#2=CdbOnM?DY{Ypa<>2Hsbkel%m(?|(Vfc+jAcDE6U0`M{r~qoPgF9JPQA z$bYzJ+-Oq@of=J|S?S_)3bm9(pa0$(%cy2)G$o+{PnQx?L%p*NX>TaCYgq+OzNk0b zq8i)9QhhJ9QI}{IiCVnBcEEYXkNeheCbjjHD(Pe9NVtxe&ysWR!ez^uJx?A>RCAF! zc@g;W0F0KLJY`!DdK?tttyB*rjY`^@9@OEkMm1T#lpxSzyuNM_h{2geA73^UE>wDY_E$St=VG!R$q!E#v> zZyRtofm7zSsJ&Qwts<8g3C+=5uG!7n)A7jSk!zZq>NM4>22>x@zAc3((XDakSSAsGaM-~G&J=H3(B>A>>pKyLDa;N^x=0EV9AH8MWi z`N*YesaJ!#^9_6vrgp~HY1zo?W7?giiYAv4?nQ?hU1~NrF9M~0y<-}+6q;>1K;Jh` z_AJxj6E8tVRabn{U8i>#whc4_7SXqL*2CrYzsgU(<-D{y$=UT%ugP8ZkyZKlmED27 z1>O{L2SfYDvuh)jqGMjvCPLO)rb1@Xe6nz76y(WV?_udwFK8!}ygOTPz_mR*oh(luBzZj#tH>UKP zmApmW=Zfz`2l(R;>GQaSxlcQ=Sa7tb3C~7+*il<`y6qT1(e;4Il4GblO3=Gk3jP#5 z6HF@GLC{}?7prSs&MOen*G3u&+2wzM&KFlFDXv}Bl+V{Gh)2NGPw&X~VBf!+Ux^eY zhNG$-s`afv^!-gh`1zUqKskoPptR`H*=2t*r%woIJZZwc_4*H zQ)7At`kXkw*(}HFdv!x^0e1%A&NCya-AWhpUc=Ks9tLvr5tGDv%ZscUu&p36OBOgb zQ}zhHw`BPW43~6`?{pme)KK)gLzAoWE`PV+DDOxq2_Aa z@#GyBg=ytov`A?otPo9&@ ztf=vzC-NM@XlefB;?FNdK}>!-;l2Y%NO^ z7kjMys}tdQ{eag|g_n}NINzt2xU!)oR0$7*B*@Nvd`@x$6s9DoceDsoiV>-->3SvP z*Le!Hk{q|nq@Zm(#@ir8=qL8xU+d7O@QY3(Z|Y7JqZ5)=>+Oy*+L%Q)%=Z!9C_q+S zUzh|eUnU9F2e1k-v@Lwn=FJYP5G2K&jc0bt3&JFHV>K(~*N@b02EDJ^;RsK^amXe{ z+;nacA%yNWkjIe(Nu~)fX&=6p!uP#|s&D#P#l4HZ{>-^VnZ>gJakG*0Cg^ktILnnu zS@h}ptw~K-N{xTxHUO*!Gac)I zM!&bs!T|E1iUx@?@^pD)IP^vzQG6~7@x(DXX~q-)(S!bdjca?)^cF(N*&%wmE`m{I z{#=HQVbC6hMZ$9)&}lWZJE|D$_^d$L zExLf*w-c%5#ZGcjE5A>YxM8iH4Q%vp#k*(0lH3PW3sgW9x*g(O+%PO`?Q3uJ@qTr4 zbHT1=P=1x|-iM{jT8RgCZ9;RC9`%| zY|l-Lq*&uuwKq+AR;PD87%KQly=Yt*Bz*@zx;4E&?5b3wetiUzw696BBcsvzk=eI? zU6=IjM>y!2%f6k+TJgchywM%zfy8@`WyRe$iFW(|F6^)jL6;N((WFv@$<^T}Zp(W> zyv*)IkZ3&YPLVxLBkuGtU77J-s;21jBWX@tyRKM#)4_g2kC8N|S5H1-8QWpLzkgl8 zd__jT)FLD-N#M)Q7%8|sqU{^vHW#nw=6w7y2og))oa?yxunV!_cX=ObC!W$3bE@)a zU+pT6c^}i=e$iqi8>l*>Nmt&BfVX8z`^8Xu>!fh@#cgO0!p;&b1qfIC;TcZGs!-V4 ziTZ!$g^pj+=O=rBmvmNNkWg>gf-E{iw0=Q-xH3_@n2g)TO)om5++GZ(0NqHcyZ!Gk zzV1J;dY1$kPdd zQ7JZs_DXVe3mU=n{`#$-N5RhnE>YeiBM&0!c{-Yj4#+oO*w(*~NxgVj^MV+5%c{X| z8jo$4o9TuFyBG>{`e17>N6r3n+%6Q}wJdop=-d?-hOZ4e#m0k{{;v(u(+^ac(3jOy zjM5|)A$UMdQT7a_Zq?UFctR58C|jPYobfjN`9wP!vFb|P=YIXd^=O##U|97NR7%#% zGXs}jRC&b#|C+yK-rzbz@c`wQjN~LY-mN7NQPRq<#!q56rxL2m3xXVK)eos%Jy#8c z(t<{$|HgJ>kB+a2*A)IhXzF}k*d+mOYXrc31`dhwEOZ`rc(QCCt~}#ECb;Dj9cG&{ z{7(9Bv?bj1fhQ29kXwYYTGxJnE$P#({xotGbk|iiE>VD4Twr_5Fh9P|%B|3}`ty#^ z4yDCc^0RwD)79Xfl$ATw_7AJex%I!V3YwXlBJG)38*9y)`0CJa4_9W-8y16paR@e>9`2j{ z^qCu4AWBOy(3H^9@Z?pN#Nv3e4vX!fZ0#-ngTW=G=gg5+JV_rH=Am+6C}UdDiU z_r0Td5-fSpr}_gd06ekFWfXvM^OQ;d?3nM1X(Vnc&%3FyzMoH`vx!a$@6I3n>kY%Ry30 zS^_6Znio}NPo=NWHQ~2o+()^>Z+y0~I^qhYxAF`Nzgvt!+$t@Uf?$xIuE_~*t2364 zWK4skzs7&tpwqy0d(VBX_G$&_PuX1+(WMgkuGhB>O9Fj2&2rkK#K3qXj2NKV;?)u? z_}P6T_$J9A&0{J{-csqs?zC#F`P=D|-s;g=iX1?h8ihwyTQp}SSbk89)vZpd_kq8V z%>1r1)wq~nUO_(yzbLj8s5F{<@;{3>@DG1vO5ji@30Q&xd{*521IvM zX3`r*KBJPYw>Ibb^1#ywqGB6RIeO}6 z03>h-={r*Jm0-D+BJQ1rIX@CSN+ki7tiITxGisL6*ptK)bX596{*fTStDx>oe0W#z z(@U(YUUNvUa9P!wE04`^omz+(yQC<-$p?Jny6|J)N<<`0)c3)ZC1Y>TYTvUbA*V_=j4u56p>7OL7eZMq ze?#vsdX0v3;3f*>0j5i2fS|O%~p5kk$UZDc5h3Z$tyJ~OTKsjiz?M} zXJvI7v(J6&qbsvSd*u#IIkMOr)H;V-dw*}g7_Slct^n9o%3+UyY{1q=q9~k_qt~O-_c5NtW0W$^Sc3kP0HWQDbTf~ zo3cRUN#T3nwXk7Upddsjf$udn%>UcE;dqe{|tXWoB8Wud!aF>SL>RJ?OhY;hT4 zXKe2Cxid@Y(9?SID(38R@*^Pi2V}suYKA=fE7L%`jgoCZ1=qatW}4{l2y@E#<;xh4 zn=ZiJKS8lFT5H(OkVAihH!I}l=Sl3&yPaYq<-X~$xmwDfIu&7|&o{~(@bB3MFY;?@ zxjT+H?4bi^#9S;A#tpk5^;^$BQAJ_C-ugieHIKl|Op}GuD{g!iQ985mvkNfb64_|4 zO~t-`_2={ex(c{mVv9Bq&r*jC;vE@Ife9zVld1ES5PpAnhzz$D|Mi?30qw{fmBsSkt#v)*CEsr0 z%{T}zr|(na%?iiw^0`L&pX45xH$J4sT z7^y)%PXi(EB;R)9_(W^*F1~~3e?&+!qo-ad8;_>8{w?%5e}V*VR+Bx+P^?s0nHH$F z%DI%R&i31`YTN-e3DU78=`S_l$MFcau%$M=jD+MO}|P#h$O2Z_54&paN9SQ*>}0&% z@i;)Dc1v+AY5zP{a$Nq}`l(OlDG#>~QmE^x6hdW9WQgwIZ)xj>r?UqRuvs6U{9WFk zBN;Pb_`{h_jC}r|<^1_>;0ZNd1Rhp(!rQn1L(CJv1>EjC#Vm#Yo+tn;?g*Gq5i@DR z%=|yZJg2gO8u{jW#|q|uoap!EtQ!Ki(a+n)`hUz4IG+eiwjkMp_u2mb<-Zmr)9eJO zj{aYKfz3fA)Q+`ESG;CU8a~Uiy_hAJJ0n#fxs?w`FTnfP`lOOfX@!59$LlZ|J@vNq zO9Bm98Gjzp36dnfNKTI{A>wrD=pA!;G8!WGzL?+2>o6AK3GJiS5ZSCo%6r(+M#Is` zex-N^eY5JiYV>?N-4csnf?Cu});pzfjc_(TK0dYa4fD8cy0$qFlD?6fq`OW-YjGA5a1OfoF=ImX=R++z8BouaG<3J5uR& zKQfJwxH)L6tp>Dgx*lyv#(VBpd#4(Xli35tbS#{IO=pZEpwWMo_v%@H!%+`g9#b_| zE=>m2NXAJY^&1({1djp7J@@!0lRh`-7JPV#o;x;NMinIvt6gf$txIj`Dlt0M@MP(! zabi1h!oMv(M$qaU7oe^II`^r5M1a10`I`A)-eq%xajmxd`Q$CVnwJuSQ>FqNElzV= z8?ti~(&MwBqi>oK?I>At_yp$d!U0AwqIOqXG~kMZX3}PsF|OXhydzD`gy{TIHBH)N zf@L>*Ku~jLO!Mmx>N)5~r`F8cXAZ;;om;b@l}lCVUggFkf-Bvld>&EZJWksTrH=mH zrHaom(?b~Vvn6D@V%>WOBa58cxMGjaV#m4^#!@nBZ6AbE!BLH=@6r&zDVRx$G!!uJ(#z zOY6KKkwp!lw!=XdHV(Tm-Uphk&wa%~tz4O4qBo#`HUL_^nOG$t zk+~ebL!~&@=)^?pS5_H~IMsGtZmS-Il&_RuNd?w91)FhbZB}GC&b(XmEGHl@*)3m1 zwdzuzd!;pM>`JjdEV(kpI#9cT0IBB9An@*}p0N91eL^jUlgIm|=`UM10`{M8h5xua z&Lmfxg!PgK?Rlx&MdP)3hJxQu_0c{687y@>Vl6i6voxBFp4H~-d!PqC1$|4LihBRF ze#aiv@scIre)yXMl8w#zlDnIovpsFOnlOu&cxZp^{P<>HN_so#_?0S= zj3ED;zIEG8?dY$xYole}6Me{{wSgJ~o}~SqJr@9OxhyW&nWd`6Lm`4trwLOH#1yVw!_ zh@*_A(hm2y!4@ZK7xmfS4a#_AnnPIrwja4xa;h^YjW+$gOQmP$;^eRcYayaLjH4g2 zx+3QJWU71@w)aYMK9Dj^>+a1Ip|0e^jN7Jg4TJHC2W94MH}Go*&%9E)BHx?NcWKp8 z+ljU<$JrTKHSEvvJbimb2_>~gfThb;;_jmt6XI&cxa-9NC-1GR->2*22JtM1WHKTC{Nst4`T zGncAQ*K-1-k#boNzH4ev$~EqzTchBVuXjx)(-Zk$%GBl*pv9WggUf&W+^zEj+@m{k z4&)jwaY(5`0n-%i*0UqWhhXjFjzgXAX zIs}RXAE~$`lUvQq(y{q}K5{1pUS(8%C^%RohrSRk=%L=%r-A9D2TD+!fY{xMk&-H= z_-%xm#@lL9))aTafP8(826tichIOO^6*ia)Dw3n5pqdpwGsD-8*0j>-a2cd;BE`{+ z96fw@QE5lw+%G`;0ocE4X#Uc9f$~#}`djv(CsOr)ItR`0?Y- zF*|SymoH-%NTaE3&I$I8Ph=v?NsIN<2b(C-?{#$N3Oe7M>)cvD(;_V#gL!R7Zc2$p*L^bFToC9`5iwzm%hVc)?*Cd)t5vE@R!k4y)l93e#nLT9Xx9`7gvhf*_Yz}4G$v|x zj~J0}J`(SU64&O>!vmr(sS8`LES3`cO1EFbC`cf}AP ztT0wXkI|?7c5!L&wx6=b^ZUoxgUH-Z`UiVl%Otx{R3IB0>D)$c1QNk;1kqfM$90aX zt@-Z8Q(lX^z`BuXCn{W57~-ou7D@BA11O)JkMd6MosT!~U7{(N=5$Ii>pQsTxly>d z5}bKTP?)b$Xyt2_ z1BXfzeg_Q|I#0HIw->I}smDsumTZlC9GAgoX>N5;#n`=_(?N5mcK-Co_xaDqYi_1Q z=w0{?v5z4N)6HL4B&Le9$U-$dO{LGq!EB9a-s^GI-1`+UkaoZtRWej3LVlWS! zT-W691P+=jU}7I^P%CC@fp|fILYjdXm{!FL+L5NcmoQnbRSa%%Vsk)+2nj!-DeLPs zxwFmlz5U)4<1tLZ&o~Y7-?2VLa+2+Xe;wyj@V-OnO?;Jw&_BkA1z^tio(RF+%D~*> zR)bxsFUxUH7Jx-^N!*^NqyRbad2S9hZIh=&Cls$mT-}=F7fc)Nkox&7iO2ECRb+t9 z->~7D8KZ~?gJ{sN9XmjVLGeKP<0+ToKOxNTy(*@8d2%R;bA>mGF3cXzA+7$?S=HQb zW>z1g{EOX~xTpB-?pavFZk-Ld#$ZMbyVh6(;msjC@}=gKzel!%RapG%6aI5FNd=5# zYY-Fu8w>wuefN0*;GM(I5B)!FG*6!L;RmL&?JW%a-+^Qlrz!#5{h_fQa_)cN?gqfY zPNrzy{Ks?teFf(S0MidV-0TY= zHS{0r{_|%iNKOK_%r(V-{pS>}fBo%|3J_rW8`Bv4OB{0y9Z%H*hW@hYHRP|M{^w|& z9|$lBZj#o27>|FP_x$>?lP6s0|G)2tOmh=ZNpgVP`SAaV3*<72xUFBN_~OOFhIv=A z@SEh3e`T(J-li&}LcL{MwN%^Y>x> zIhvBElC#tzHK=mrBb9!^tMt4j;!1*P0&l_yVzKoool*Jd`#xI>en%}x-9u4NFHufG zLFKCN1dFVSw6w?*9QI^ z$#9b}iG0abEWM#o@+g2!IxbHq)*H7QJ8eo6M_s3@Qk`b3Ve^i7*=uoU&!Qu{u*?U2 zCP~a;S6H;bex`-fB;0s(Y;>X_k-i!+YPm;}t(wYl7-OL9R6Dv_@gh@)v*lkV4g44o zcD}WQR(&;`N%)B8V5XdZ{(#do_8c#$?N!4S_wYGS|8%UFZ416J0-i?N-2RehubBpU`F4Uim)PwH243@unwhwz`IsN9%5N zPCx3o`Wv5#CsiLQq?n@W&E~bt$`#lq2g3JcYuqAl`pqr_VYTS=f3S{!p2$mn zG6D9f*m_dw?n5OLZ!~qCxLs92(TR6bxelvkN&O#Py~QtrNrgZcl#wD{hj0AbFl&S! z?O47F{2q+R?b&r>Lj9UT$1-vF)u)!!%+E=Cs*mz*cepQHWVP_X+=3an*|mSW`eVc` zDYYIhA490$98%HvsNqRT)pYqrxH`O3gqq{pcC}_`wP}GeFrkJNShGS z37@-4W?eR>szaAhyFlfHAzdLXlhQqW-!ienW^$A(`G540ZibO031oF^(!8Z%zTQ*6 zo=N>_%u${KO_9UT85RjteT=T$eCu=lY9l^GtH?l!AUMTTx7#P_>(%<{Aaou_shRmK z;hf0i#5gX~ck8{#3&AHp?y=-7dx5Pycdz!KXjPe0`!Wx_AWK{d5tm~cfj&`y1T@UN zCu;mus8nQ-m-N%E8sccF156bvbX?cKR$XZ~@3)kPl@l~wuOyy}y_@kv8R}IzH-6tq zB_Qx1%Qd3Td}PMdypwTKafBq!95o#-a()|)lbs7cneghd?nQTtmRK=52VBL9D0w|F zO(iUfdyqTyJ%#2?>H-I&CZ!+#l6RG8fs{=o&E%mThmW8VL`yIIv^NSw+}cENsT-?m zoN)T{{sf$$501ZN z8m2xjKKc?dB51{~(0w_&CydbN_R%j*5bWhqJE?wE+<$fR%>de2{SfojKLIFb8LpO< zX5uR?eH=1+*q*I*Sb&@7IeO}tUKj1332YI17Y{=YIn6< z-LBTmIU7J3+vbtAFSViZiD!7u9OS!P0m#Su8VAYx5h+5EbD49Wxv?GaK@4`xH%7O5H$@IcBA)KZO655+_>vdze*ZK$*XI|} zF)f?$isvv>q^tY|;ew$_)yl|;gItdrwuo z^s_`;>E>jsQr|-drzTh0wZ_)QtRRr|o(mNto0$IjA#cQSf7!>gZ#SnhXok7my}CKS zN=J@_TD5k*L3Da{v0uM_UrHGKmPN!X(nr*;-nhf&M{H8j!%|@iU{aF$f!`6mVH&|} z5g}@eiSHJ2EY$-YTnmFU;r{p6{V`4D5-KYXfwJH!pQ zcO~+NTXNm|I;SyYVR{(!D301F94fGoSH>BRo2b-@!+F~OhoS#?;{!P3<}a50ydufR zqmj5LH&Y?>(J?~3nbL4_#BoVl4hQti;uZV;dS*7;!B&Yk4&S*$LPN_nSA~W%7i~(= zQH>E|58Ior_!~(1`UiKx#D$kK?4;38$FV8I4G27dzlnXVX(Q^gq{{$#QR;_;s@cXO zyd2~HZu}T7TEb~%L(kJ=>hve@57$7o!?`+}$*%L%%(XDxH2=ZMh1gvlR9uuYCFREI z$VwHT%S3}Lc%;!S$oN$|mmzMW&HL5-S_GlPP2CpjYVnI-ItJ_%0w;Y*&i~5O2FP5X znC~enNsT5hU#xTL9YmmQ;4gP~3sIU5_W|6RrPz0)%&}`8R-F|qR;S;QYn~Jh^b0&E z4(H19u^EweS#cH&FAA}4&Q<^3!PCWfORrhgBQ$DUd@nOA7&4!0Ql1PbQg+}dXpwST z*iRq`{Ad1BCT+}dx<)hl$$=NAt4;ai88N2ulsg1j?RgVk`dy#iUZ zx%A7r#3bv{psFcK@FMK7fTM z@`>iMqrGSj>;H;EOpl|Gj<^r3e*n^-l>!OkNQ(2$%Cm2bDqZ3(OF`sB3MLbO2)6!r z0`B$O=EAS-%23A8AbBe^FdQgT38C$G)~x5; z;p!Jikh7#p2)_L!y8Jk?rGFu2iIa*IoRe zILC-qBS)!wuFiPv>y0lf8wC$DjBvvMPVNQ%rs>{C8=2z20qik;e#u}>Q1j$M+)NJ+ zQL=ew>~G?t8y>}*el_Zi|0thv^+5D6^`eDF<=}1&c^2HUh%1N@8gCPhRH>`gwkL#g z(#gG2PM&$s-s({~)H>4e`~oc?j7&ul^^Vxh$cZ!1vKI?P7)MBauP2!0Y zm%Wta?rS++V7Vl?7D4pW;XoSlgd`a0A20L#h4stKG}lkS|5l&>jJ(QZ#uGwSlIpgU z#zIc%69ybd1ZIM%uNbSO^L=M;cj8}-dB)SD3~8jW5&Ia(n~e|56B&k>qnY#%_toiM z!*8*R&npRp^GSeI-xo_+^+@~QGT9vJ=BP9^GltpKo=bftbBE&27p_Z1H+=t*Bs~!j zdGQvC2bde0nU_C5_BqsD{L3l-_6cuKe=jpXxOe{LL~jip>R}#VzJUaZ!Lm`rR*H=~ zr4@hMfIMae=U{pzu77+>Y30Y3KfRN@fSOH-`UScR^HX5XtZ}e*rZvWh<3#0NgoOR| zmolx#=QrmQ{x!wt_aXw!=zDjf`HU4IWGNL9rx(*3^YWiKP7)vuoM0uOQ6z*p^}WU! zQM>yiJBgjOoYf}I&sC%ayvU-i;Dg zptL+JQeteP{#pjBMkeD$ldv=P?90Z_b(G!Qsp#3;&<9}cbZ7;ATIdvK<{gJ`mFQl9ek53$T2h==ntxhiF=%Ix_K*iIY17N00(tJB-F9X=KP3ML|_iBjB z;~N4BuO%7$LD#=aOq^SSfS{HN4^9MBP)e|l+;m_q$l z!cVg1nkJbBm59xEB%QSjvwF#|xjY(lW_{}ekbD2_)P7Mok&XbfgXWk$^oW*IPbrHsasGn|I(|ui{+P)DP*B3G@Biko|Nd5{A>caUZTJ4B;HKm$ z*c6Y#bK*h2Y_Z1j7-QU+f^=0=bN(fA_chG?ht*AW6pVkJ#Hd>4mcKi#Gkg^lL~>rS zPDW2pZ&Wab?)L@|O;JE;m^>%>JKkCM`1Pf8YaX0fD&~L{v9b=LBr%bmU;DQ8p`5}m zc(b0>{AC^OBLCD^pWW@(>Yckt4wJIdjXx#>rnMXgo0MX_rC;^BaI1OZl~i%cnO|$B zAP=}F#t*~=r)71QesX>?6ckHV@qX@aY&SbzBCt?Hl{b0w#kl0yd@Xry-QpMH^F~f5 zANM_WQ3*|9H{7+q63VTcTTrpquk((c|2El~`GmJ>j+=@6H$C9Uq^Nn2#ldK!SrkeC z%&w(IxrrN$(r?co+eZlyb(OHOvF#h91mO3*ialzpGv>O|Ic49uH>EEN6|jdJinK?% zaQz&t1IS#4-r{eV%GG~Xgf-VeDlQ$v1*Vg%Bbn*ihe;RPxxSLB*KW;4x1{;$jZV0s zglm_XMcF%IBag^7jS&Z{oZ7ki`nH3`=tj(2d#kn!yy3~hk(X=BOtt;r8;NCV_ME$6 zX_JoN1gm`fL^eOfkc6`Uc|vuCxuN1^mL1$Q5)%rW7h5qP1ajASwL#@w!fkGabF6Na zpC?6><9OJ`tFgP)mKMt1Ps=PzHBY-SKapqK!{u6U+|UTK31RxI%mh4nndUP~(o?1p zK1P%Q}B=(nPaH-N$q78 zAd1|a-A-pG%nU{phlhpTIT>HV)*{qPfAJ#w^XJb6W@7rH&!7 z-PL;wgBq*>DOV$;pl2FTqcKXjhct}9_|h`?9ZU?$pn!(WMzCDBu#G-}7|yPIhp}3> z1Wcr>v=|=Ibg^YbQ`(p_IXpGnf%(HgRqdn`&_hmhD^|5~(yAv_<$0#It%bOQvqx{! zBA}&EBJ93r72}hgI2a=|EQzf;3^bzgbO~SZtA)OTP3g~y1L85mXvxLFPL-xgiJxJg z%oN2E{hQipI*TIJv#Ih3jbHBPf#Nm5JV9AD_`Gn9N*#+_t-`3o=k9)jND>+N8H!YG@!7+cR<1 zhLnUAKI!=CR`S7INQ}Tdipj-!^18QU(U{oW$EF=Y(_Yu%J@M+*c%^3?btEe8umiWr zuaXwX-Eje{Ww3if)k73<10!pHURB}B&Z>XJSwT13DHuYM^_zZ-0%{vUvt`lM?zTG? zL>SVAK(DaoR$47eB^}`H>rDO8j5{VYJCQyITcfT96?V1}l80LU2h(-Zn_`?=0-7YK zv5|gq1Y{Cu%ndYN9R3iRCg`~$G*b2?rE7vnJWM*#>dCaRk+AEFH^e9b|Jq?r=gQZ1 zbVuH0v3o+j!Zq>~a0XeH>^7(kk&!B!y4Ye3b$M#$7<0DAO)XxiP48{2t?8ui!A;z9 zSEE{s4v?JJ_6ExM_b{I>mMy{Z)1@+Ysxbqjr|jHSYy^4L5UW`4FwAATdtyAkSI`!F zZgs^m_Il+?+2BN*N!Yf^THk_0%J8*da$J@Dm}SrNY`Cx;LT`E8sHI9JqWHi8+L*9-RAHHBRW-0CA0` zHM=sU5MQ4}HJ2*JpXCSooM9k3Sb_wTYK~S}Q27aK-obRNGeh_@)XgL}+#dVU=YxU*^8XR9?83F_EE8l&8V} zAV0a|^mGeesPRelIZ{ZR`keYDJhZ{8@`Mw%PNUL#+*aCg?v%Ham!OJWt@=Uz-_7ag z{&qZbUOkt`;8XBY@Kx%|D*~E9+f)UxZ5)3-mT_0$qH^6iq~jw5IW1)FjkoP6yi}`L zbC8@-%JW%{YWlqnb+GH^5~Ch#si5Bg8`JjC9dY*kqk z!(|Yx&SWRlzxJJl(OO4}Tbxkb-IL5}7Cf<~p9?fC+0^%>pbwE}4{e9_SUUSoB$9*M zb@{Bemie(L0lGUW&qz+H_xDM#dRHWnkZpA!zc$4U@(i{2NZ&5GnJ?w#P(nQ*=DAZT zho0&GXefSwM+#RtWd!*=!y&f7<5-HT_%B6vcWv5D@1(O%f3#b;myrh5+`3s@e*`p`MX`%*87sWYkj7S#T9!6e zZ8mkPx?)!nio~)rP6)5#4uUe~zLnCsfSVlM*>uYab{)D>@=z~75TMKWbRMvJiYbkk zT{>(tdQGM6cBLOD|EoVEGXDT7Ks)g9BqudP>N`n?CcR=2Q}_;5A?!`rv#wigtN~he zg!z}T>cai?Jn;l~?P5sgfK!J~8;DM*AMI?|WgYv*HKAq`S}WkI)KnAn=1s@9Vh+@Y zS26b$x|ed8pti7^o{&CE>Jm0Ei(0c1QlvkMX*aYYDZauJuaj#gzirL!B>&vrsC-~C9KSvI* zOh}i) zw7ToVwkHzCGeNwuu^=j<>D8+j)YF1u&RAJnf%os68uX$i_c;&e5j4?*vwG?nc&}PG zGm`cP;50kJKD8DCI1OJ!JTQVGe-KM0*8=K;o{u8=S#S)tSqn{ctzAkvLbfI{f&dXDdj_y$I^yDYb9S6J!As#;WvNMj^e;K}j8r3% z%|wXGd+6t@x=%M^KVB-^*%`3pBSBx$SuSBFFBPS+Mh*n$!`ivHw0`MIhZ62Xj}Wv$ z>WI`d(NLIVje?8JLO!R8;ve;?+L~~0%*n}V3F2ru^;yf__=Fqw4$H+Ncp5zgmN0O2 zm?sPs+MT*S;rzv7{>mAsWJ~C_r13W-qZAXW2r5Ww1Nn)J-mV+w_Uk%~5$EVL4&1ze z!rDZ$d4tX^ZqRHPjgp~%0!*@}g{w<%8mXwaCpD!U7w}*huvByxM(9^&{}vj#KQ_N3O~iwg(9Xld)ERLc zdpF4UJ$R7uR?3!_eyB+~Cy(HOFh(?oUITi`gN`^cE^c`d8(UgrUEI({UEW&Yd65v3 zgw$mV?9r`?uVvE8ofqRanJpGyvj={a(Wz`<0~2R^y`{~ z=>_!Dofd)1`$q7ftg?Zp{Gl9ME%Suq;+_(=Ok-f$%w0U+jTLqb_w{ISOnRqNI{v{v zV0mFnjs_w(jOwM=cM7xy@$S$$p%08WDyxQ~B3;_@D?)721YX<(`x369fY4X{6AI{M z1XMe|5$vNwC_}et0*zvF6yEpXTn_i(i=A(& zh^#OX_KCoLyVp~!D^ar58=FZDRgru!9#h1(V}@5c1^R#8tH{|;(zq*QLCO(DYihVB z9Qm#3?II&<-i=e%%$bjW@HgNo3R-!yA)XJe+{f?-7yn|M}y=( zk8*N32}o&<=}NE{dhsSUcqSF8&M9Y9kt;z2%8C22K?CP&JB(2le4nV@6{&=~_`GD~ z7IwM7M<1PprroA!QYaammQJuew8b)-0Ojc%01Ro74s5y~;AD50t&3|hkBB+8@WuzkIyA~0A+NL5JK3vG2npVMhQ*DLH9#wwH zv4@3My8Q#9;8kIX$OoK7K}bTs@j?Zw#@3B_IVxNE{#dUF>HH8XnctpygSU93_o*m{ zfMwOLN58To5w@LO(K1%&2a_=2{Vjn`@5{4rFhXr@%&6=!IhC1FZkNo*z>rjEE)C=V zru*tRhBO-t;|2y|N&RNPvYqvY$nw?3oQ5m$y9I=FNOKmM2JwHy+Lt#(pi)$5rl=z2JMs?wt&WDDBxe~?(L^E90D@<~R(5_@6j<3n|LtUL6>&0bS(VN{= zwv5=O7(EH)5g-$FQ}EK$9o7B^=Mefu?#a2pz!N{qHH`j3f_tKh)jJq+l$J+HV(PAgA|IG`&&}@r!P8Rs=5O_#5 zBo1ShJ%Ie2(48|E9jg@}Uxht9TYOmshyuLStUy>=TgKv`*cc75pY-UZk zKuxTl^GY1oJu^I2bqZRdqn>C~%HfTd4G4bt^0qeGF$%$46#HVcDrPkg9V(bGNw$3k zE(jS~$+@t!pJd7nzpMA=1TD0w8?C`UL|s^$_f$j@qfFzY`_A$^4lPdpQjs0oI}FP1 zgikg)Mdo&F&l*`hrevu;kuUDR46oIHHSd;KCU&*Eg(D>lC~cX0F}oz|fox;fo+GHs zoHBN=L!`yL*by2U@RO_kudGIK2#WT`6}>t-a!~0PX2Pj#b+YCci{XRP3QGyo3g4E$ zUGkwzgT_HqWSi&Y@)RV)4L{={iFAJa>|e?|fF}$>rGPxK25c5+vbkS$6un-Z_AQu; z&o*}~mpv9bjK7EbK>ayeUj8tp?QW0b_ky-tQ4yv;T7);Kr=3;W$bD?OI_&dVb2Cp; zyX!zVv4#gQ)ApF&e;>;*3P;1=y|lDc^tV~J-Q^o-v`&c#s}YVotb=Te=VSZ;L(Ig1jpk}F- z{i7f$>)Ezd%Bzqdxe%O}nZGcvEcWTS<3)w`|8&(aiX7L-K(u2}D z%+=|JsXxgi|L<|R<`xZ_i`z((*5^ae5gsSgSLJd3KS|@2Nnx=bVLE-3UHnu$2U!OO zoxAv5QlVg78zDnNW*El#8GcntK0u^#DVg#5t2e^pw0Wi#E%WihJjM*xI)YK90J%!D z!%Y{Y6Lft4{Rxx%6n5?eA91;$p{8cBwbhJxNb?sB4;zS?N@?jg;2RmgMOOc0R6|3n zk5(zt;oPka5KWC9Kue(ZE;3d2sPua$a_icXm`7{^#3C(HnQH1}yGgKnLj(T&wH-?G zldo<5SpDG@y3{SbQYtk}jNKR;SG!LM5f5L7j3mCj-8X4CW3t^WzlRq+5%zui_wrLE0m4VVM|&?^nz=WnIieA^#?K5j&I0OdW~ zpxpF~zww3@=wJ$58&dSv`I{BR%6JPHK-x#$aUp+AqphNf&z67?6qQQw19z{C$&spXQ(&QC(Gmnv#K%goK3p)ywCaBqSFD zNk}fFQIHdlh%oj(CLtjO**tru{_5E?R&^IgOB*{25)#fBNOY^xQ&QSc8KoOfi{x^$ z+q1v${l|_JlmXFH46sD7gNI`ln z$4mW2T0p&o1-{MK_{qM}af3$urmqs@ffhh#4>Vb5>wSZldA?gwL&ys>UaZ79&lERG zEiEWEJhywK%*#OAa$`eNNKx}pV?6zEzA(UvA718f`EXQ1lWonTy#Hout6GP^mn^*{?&dhu3#I^fr@$I)gABzD){Z;O*0*cT;SS+JjdXnV=! zoSA=@g*o0*`)f>FD^cH1Ec9Pls;H1~6F*askcQciTqJ%XCB7JlFA@^6EVBO|xfqyr z;lH0Rr2YQ!nJvMRghZC))pI#*Ptx`2OKFT9)92fnIM>Z)k12_Y@q*h-^094^6S<=q#?cdsgZ;<>rG3@gyjFq`CIHjlIvanyb!DUPokJ~yG%cq@BSq} z@$1FFC*=PeaL^lmS(2jUO*4^ef9aF>m6hbjqkmqAg4UHNW{2;k8n1s!N0(3ec7wgPBHFBTfw9k16_(9 zlK!u%vhpaelYE-oZT8LtKNDcNBaGw~+T|q8y2Gvf4+9CxVQsr5EB{(RL*bwHF?Z!6 z1xY$7bLqW*PM^FS(c(46l@K@n_vXLn<;ex22v&hzC4bAtO4I|<9JFZ zegJ0p^{8e@+F10)SAi>AI~9|9n7T<~5*~R^&0-so`nXygg4Lg>w!pOoU8wUxIiyN~BTtD_YrpV5;??p&$D5_z zgc>8q)`|C5;6K;K%E5FzI<*&$uZUrczY*Wa#2_7NSYPWjVxtqwQz z4%9%;#4mLEe2&GXtQ6hm;eeLv+I!2v*|r_R5-_+XT9kz4HkV;`oIPAi7x5UrQmWmk z8EGL=kDGURm1*NKb4qYuD>HEWd6(mG;QD}3)wCxu_TGC`fGYpf<72xYbl=P0 zbWjf0xHio+ILA$&_WRhMH34G0WX_RlI1%97N7YQ<#{)B7$5QZff!4t)zIlslOpKC_krOq}f^77maSEq8br3;ybykL&-WBV@Ez38p>Cm5ZCjpxcl05sT9=TOM{k*xMsMfB3h671t4?5iJg92$Fho%-H}x$dQGQvnq+c2tjTt~D2Qd%@5YE3}LgsvW%!XF?GkR|8JXpNF7BZMIA zj!O_V+}t?DoynYZsHT5n(qA1fHqTqLrAAixp}Vmw9F@&G?^&$HABxqxnjUWkWDL|L zlZj#QjPZ!{&PW!SjdX!0j?vS;r%85O9p7=^&XSVJ<06wG9H-jj^-PHAkLEY{zaqJ;>cWePA?mjNh!EJz2vaglC@h&T6ciMxAzDa=AGT z;^kb3YV~1NXs)xH`E{buWq%owBfo0~VZQN~Q|1&Q23gIoJdWqRfa>ZX`KA=AEtiex2cb75IyLp6ztF2HE&d-GM*ePI6HSCKAO}$G!;l= zk+8mrczxb-7-CRwJ1wZsi_SHaS2=Ohtg``Zzgc*HxNyRGZlRO}eEw)L=r0H(8}r-p zgQsafd(outTZ*133%&{gvV3H);z-%oZ2r8KU?iT{cS26h_``U=J4RWiMeS#b)k6&r z{XRYm<{s%t4TnC#e;9NQ5#VqFe4ISejB@M2DE8tJZrw67qC;1ywUiR|sabs4Y!FJ10aEem{30q-*)afxV$ zPhtYe$uTjnX9?}+7X=oiHzpf-mYc*O;A7Gx^M37_h}5)nlIXRXfb$@IWc4{=15^-{ zf+sJA>l`saZ+^2CuWy*|U2a=?(X&^0#^<99tNL@C?G>@np4`k4E*G zvv?XGqD#ibT{bTRzEr&xokGoEZU?IXxRZ7{fXmO;q0&u{%_U z9~^tg%Gf1sxid_;q(Q;ICkS4yhh6}S^^?p=0y zLHf3%%yB?#t%r6HaVrgn>LI^TRq(y4ovQ`7+16Rza%1}{hsuhk zmh;@ragP9}N_gt{Ynd!Jlih2*T-^!)xJkg03jAo2$K28g?#_3-aG`ZS;1SS?W{EIl zdTz&O__$tyg`m*XksSHg3Lzo8O9~4NeMq@eyy3my#&VYSuussU_lMn$I4Ak~w(r-@ zgi#%|yv0N$(Xu-zyz4t~GK>#+!zlWL`GJIhf&aolCC0)zQT{Ikm-U>8&K2i3&N1fa zRgX6Pk}Gc_1UWS{MT?5#WRz<4C;M9uN>c3xH>3y^s4DZW^);S~KKxPHhrfm=`-+CR zfPYE(xX+MDCiQ$qb)8=l%qLL}=LLMmUpc$>^l97}54O!^B7{rT z3+-p?rU$T)p#EfOp}{4Gnc9wuTkg=}P`b;1`F_%ivBZ1OF1a0U#T0RQ1vG;74vcES z50`H|tEeRcr9t8AdDX%X_o4!}MjJw3)L$7f8`QhXVjpmfHFrN8ns30`oTOJyxrbFg zD+@i?m|#pPlhu$b0Kq*l-5ZD73+@Zh`bp4i6H2#2Kf&C8?;Q^9G0qa;(+YB$$+Bx_ ziEyUnv9Pr4{F3H08ACRHewpg^KSoGtkfjZ!gis#XwnmF)718MtApm@_T5hc6nMA2p zePlwV`pj4*+RW8z5Q?8Yo%<|mHIxrDigJagI+UBVFO_e*v#TWA2}x%{I=u=7LpoI$ z&;;O>uXQ-Ie*YKYa$mwu1s#IksNTJK^30ITdD=JjH>@K5J28m#e|(}-0om>RK7;8`{bjnL?{&+4nM#v2kb;Uy z_z7m26q~T9&3eBS2|H9wf8FVpxP<@CN(z0?5u1A8LfOR)H<8A+i?=QmwQ=@P{GS4Y zVmZ{I#(ZF1Xq<4x}V31|EV44i<8K8ESU_>u78G5 z=YO8_vW;WlS4d=gu%Y(mFETkL`_&Db7%xdVR>QyrUc+}NibXY||3913r}|^K zH+j@Mphv5kn5U=_t3p1sFyCsM+T&z0IeYLvgfGMveZrJENosgOo+e=hb;pX$8(*`{ zb)8d7I(Nj6t2>GJPXv*5ixN>b)aH$3SvEQ?8yEVnnjs#uS&o3Aua!eJHW3uN4bJFANM}sT?x_S|} z-Npg_u(9TG>g&xjkr_{n&Aj3?hH2)W@OU48W&l!?I$SV)d_V*JBfpy~qbfq9cBX~w z`q~o!CcvfQCLQwPw^UAP3G1hWV}+_jm8{wI-2Ou)Z|-LWc?vH_k^89XXwGmyvxreF zAn7vbncB32%oDi|?mJeBa{uRlvbrf(=%--lBywL>h~{l>C5u?a#NYp)oVBx`r+P@P zKA)=fXJC<6%u6;Oc9U&VX@8}$6B%o;xvy5|wLouFZJm-X4&mnd*2HWDyux5c#~nWygCFVWv;Q{UBUS(`yd3f6 zacz6jwd(Cm%2n!u&Su0j9P zgN&L{bSuD$>b^|ZgvEFAlwiSfbnRZi1p)EF_mK^cS5>t>@kY z(w=x4bPFWx+Ed8-h_wDRsH%8K$X@gTTV%BkC~6~ z>a`yE(c}JIxPIJKKqkF#{~J}QazZn~Y@?jI0%jR+YCpx1IT=3Kx}F)Z8Q^a%Y&-UK zxj)sQW?994zeL#Io9KkmeyxN?E?J!lV=;7gvX!&5Ehx zL~RHCPouY3l)v)~#oBGK(W&Bz9w#y5q6|%W`UkPe`GpMGi>ji&RLyS}Hy!-!(zOX< zrVfUZ^@}z2vP1b_VX9;KL$va+iQP*CKsU>kxo_ua_ zDLR*6l*=M6T%A&#ebNXoSNcggk0!2iQ1gs^vPI@_<+75X3{;;V7qa3u<74{K6R+vZ-|I1n_ z1rXho*sosO(YgfiM@WjFq-9gNN#nAcSXDi8aIy6%5yWX*BWU0KUtE?QrYR!ghpS9z zy%jpW6mAI(hU4ug5>9i0j`okh?QW@(&Yz@?_jW~WzAf67N*_gq`E4nb`~Bv^WYkD= zA7wU~`yKCB5dpjPHJL*_d{aHkg_nKlJ-1|Sp^rR=oIKi_kV(jVu$2!X0^{ca?K(RU zK(_`2)E(u253jj>u@~!HFebY8o=z2lMQsa&$SQi8G+}y}t*XZKyf6oA%U0zcd`|KF zcODO2((3ek+^)-Ov{bk%15v{c6{lZYd(NKWFX*?XoGR)QEwb(35a{vF<^~6k|7r$D`h7Ma`fD* z{nBFXS=p+q-CjiQdXCay)WyKZAg-oz1Fgf>hrgj+fr!N)$WHyoMIu}q96gi)K)o0; zG=U}qFQguYTzfq->{h+ z;%CNo=VbT_z=D?KQvw3Qd#l0FaX0%=EaTmgDZQ_ER$6GeF&ipOx6_i0G(4TJwKvj5 zmAfqU86=b#>?3!FMNMv-@a`Db`$r|MA9IgT^!i-=)Khb35~nSd5ehe z;zJ)yIo_9wDgx!Q37U;a$tU+5;9DK|1zXT}v&E6eM<%%O5IVxEqtX$@N@FCW#B3-W zs@Pl5?*?*-Rk0=HYA2TmXq)&)2|*t={`%4fa;p5iNuIxLvlB~ng~5_SAnX@;s_IRY zSq53m+EpF6s_u1;8+oOFO!*H1qh_0)Q#rOiBV5u7MLZ_~aT za;bXF=Leu#+Sk$Up%~iSp}hF9Nc6}T9q@g=tm^W0lediFTuI(9D!Z@<3Ma-xL_;jk zAIVS0)48qfQ{7^_6J3(x$(ISm)ii7XPZRG`F`te6!fHOlLBRWXx0sSKphVr~*PV*| z5ZH+C$^3Zt>@?mV&3uLqafqI_^e32Ix0#mbO{=q;__cY*y=Ye2%+`HpDWp_h_1Z!;8YwV}?^M4GXKN0N?@j1qST`Q@Xh>8wtbA3&UguiKcmP9c(T~6nZF#J+9h?xDA8H$Nkyt33q#e6Ht?A4z5I1)bC z{8r|u2Y*^VEpz7f*-zT<{ra~+YngD!&ZHkc3*d3J0I+={; zp07ut05yAg-q_RAY5k|L=HqwUG`I-8+1g^Qf(dM8)9cw5jM2d$2eYdOd8$=PO3D~q z&9POr?3`F=daWgWj#zrsV`_$H85+FAZQ@EOLaW2_Yi^&!$eTn71z44jmM0+ilsxK3 zGDqG+QcpmYC*c>**?iL2y4eMdF7RR9_ts(ywmsb8+pY}4HgEe4ANNcgaa4~1)o9gI z_ExRj+dEF}YvEYb&7`o6Uyw+6P2z^H?vPD2kJ3JJmfHi2FR$p+-%V6G4IE?;;9PO9 zvv3_VTbA;6-w#rs2H1r#nu!--`|C|N>qSSQ?6#$&yhkJ=Y6T2EaR8WQm1dRLclI zQ!LUJL#$xFQ9H*QpuVg$wE8t|+^O@Q_SQ-g%FhaY9zC4oaP*1E!x z>(L1IU!1!BgK}moe`!ov?aAfqL_DkGV!q0`l`E4^?huWgt1N*-y{fq;7I-I45C9dv zB7}GWF`+qgUx(VFw)oU~Y*euF1^#tRuqe{50?sqBcsW!46BT*(*hT2`wIMOEEEuWd!K#I7_>EID&zh&Q zM!J-P@)P>>6vi;GyeZ;Kbh9I|{r}_GNiVukzK!R%3i;yK(^>9QGPFTn;B3I};GP-* z!kdbU{2We8=(SJ`c~3pnR&V5D@Xc6{xXe>k```6-^gRc)*1_A|AVJLOGlV&6Fsh{- zf*tRhjVK|O-5ivY#rb8ScfB=C!Y+QAbfz!yo+^Yvd~Ty4JfqVVJgvn~Ln&M6RNMj| zwGx)V z+EQp-OWX*(aiU?c=ASrAti+)XZl1(EKVm17U4=Uq-TuP|+T8u>aQ+mm(H!2&s9(a9 zHzxCU@=lDGzOIUY&tgwnTzN#j9E8eXnLpUMyRivu^c)sFBg(Z#X}j zcJmKjgwusg$wST6<3efrNQK$jQNEzIu7(PJiU5M|zZXE?K<$Uedk~qBrqn8OvL4EK zu1t;9mbXAlu|E>p>zs{bMCQZ*1p`jZFI-x0s|s#T@CxZIFZV0=7x6(WxbOX#zZc%* zgLgpBby84nRBOK#UF6kGTUh7)rRDbjV$x*WFLOSu;czXbA_J|HTT5FKa|?m!pU52! zp2OX2Tpw?;Bvsy7xzWP^N?+eh%$LCEhbcjNx)cX z(u$Px7*FyyN^2GbF`=rn&-QsrnW7a7Ih2am5cyuw_&Pp;A2eBKZ(+JjC&?s@i6#8( z#k%LCciJ&aL`+BQpRk7M2xw-eD~L=+e#e%pcCe2@XV`R+&_ymISa1Tjh5HWT@_At> z-B$6MAvLlT@y!sq;*bu)W`FO(Py7NA>9cmam=n)nSze-*>2oVTW8EaX?PZ#OSgDa~ z3BgWp8iS{5V-om`#vq0{U?sjkpQJ6E_Nb~vyl@mOfhj_+8OIh*VMs^yL2;Y_%#>QlM*w$zaC_PO>Gv2-H#JDB#jTH%r4 z3KomFX>p#!^4jP8w(kSB^D$_p!RCze&wcMf>xOA*wQACbh5`ytwz&l`VT!~gxy>DQEs5!j5GXhjFG6~N8aE0P~&R}VEb(3Kmr?(ye=zUhDCcb9OCNA~qPH^F8J|qRZMjV)1p>lGZ zM?25-wff|n1-=8jaMNBdS_`lZh(}L64t$9%VvPZtfa53Q2kiWrBNn}fu3sXn_)_ba zi=G}zW$(rqw#uVSkDp?+AQMAks;~GS9z6-uEl)l6E3fdN%)83uoYH^hThXJ)W#?;l zP8x8s)(qSngNY3$r8e6i`WCy+JJ&}c(yNeSe16wGu!;5sc*R4#18W}WTc!BgL2uYb z6E!0ezv125?+WtK!Lu&8GgBVqON+wXHdmrznkz!*dGRwb3j`qE1LE-7V#kDQ2bIEGX>j8;G*=?#Xv%*yaXe5Nfk%L89F{oHuD zD_2;|YCj*q2UNtzmS)T3Y4FngIJnKA8|DF%(Duyc@P1T63>nOTq_fqNL0nn@%EI zw}yanP+Cnr3dpHOTVCz)(<0fxmNXul&p|}4fv~OW4r7f777Ck^S8pY4R+(0DBNc+U z=(fFB@LqYULVA6ccN%oUy_y4B;rMA&8=6l-j${?b9ApPFYulJU8=&*pUZao z8tAzdVNQCDuq;iluxm_Ju>G~ez!yZ9Jk_zIoJt_v0j~w~=$VR+Bry$aCNrWe!{9Hm z?v^Z9e*JA8vcw&F>D+eDV0!a$H_0Ql8KNy_kr|xXTi=djBQ}4+(s=;tx({?ZHH>37Uwn8{B zKuBjc`#%3Z@Hnnh4NA>_z1~w!cTPU2)obJFJ@M6hK8TeYXK##JYNkTVe^$aaC|TYS z@rgA{=~acOJNv*=hH$IE<8MVSIdPFUi2pWxI7n-~VigV*kRw0ZHrAc?DteU%Kw$ ztG!Fg3wVHrQB75Byx}mRoDrb>lIYRZjd(R`HyoTlv#SWaGe%H+#S9BXHdvO_1S2tU z)&jDa_7;K7$V(GoQ(jZpNIm7~E&Y->{=@HFM^f2yYGp?}b5nhd?@+A>z&Y)q?)7Jo z;ba57hOGRV5OMYdZPVM}vLdxE)g27!1a-?vtdsD)LGw%jo+U|B*zRYT%W7~yB0LcF zXc>!(ylWq)$y-V#aN7*Jw&+_4pxf$7k$3?{mdsx`Upp8d08G(RT-MClRsSlm@2{>- z5CYQj;MVd;#=1+x2)i|8Hz!5c$n){fqutIBm_b>5NV~1*`!r*HLaLpvw{Bwtty)dO*Uj=GpIrjuO3f{EH3Dhpk69)1G~w z<wa@v5cqhkDl*#g6xP12npGl%UO zIKYn0}5VD^^^jG}GC&U%~x0W*K@Hb2#NT2gsbCN|v* z{tk;cdt(9|u_e8k?!C9MZ1kq-=$BE9>8f7C*d0+=qV|#eu9k=KqJ=~WOg251pVuO{ z2_de$JiLUxrL8UpWU>WiQ#~UTv3&{V;IsG*8{;VSZ?LQ`7!y!6a2Pz>-{YnW#JXiv2HZg?UX5E_|P0`zvo) zqIzs27Om3G)JU(CGuGzy{QT*5EQ6)>s6by+>|w_A#LpH6sLjHD3D-M@JYWQ+m4wQd z*`xe5`&b|XI@kwn&6`5TfwCcO*znf1lB%R{<=2xM{hYp+=T6Hzs%x$*q=zGZ30O1T zmjGNb${3wWY|KhF%&}<&p9A_RD4t?G37U7Me-!yZ? z7PsE{0>^LIVID5jhRj4!irYpnu@<*bB5YHG-OSGA0)FzCwaOq$UhLjK=IhyA!t!Ua z)b1fjO65v#1L$I_DA#BuZP9eep6w92t20_gqKjfGpP}~uEkQ%wf#;;#aA47Sz7RHeOHnQE*3nKzD-!C@yYgvJ}`J;9ULw#DYVafWN z_?4#EVm(332i94A%+MTM-sC`S&m;IGBOus2ef4(`y#Y2fAb4F+{9+F2M6*#@mkr%Rhc3-lS3XDN@7Jq(UfAJO_$FXVR-i?56J1k zOUi0HXOO?vvz2Qpgst!@}vW9yuJc*S%ARD*#+^i`6mi@j4f7}?*1pD%U=%N6W;r*UwbZkj^#C?Zp`q$$@|1WaPXI^}Tu5dND?m6m~ zz%nh&{=`3bmZk?G<3te*k(&^oy-1qXroXxQRp+6#;|0CJ(`*No-e(ZC1K(o2#<;$m z9UzdFS9j~hv1p8r`sSPlx+2Tq`pK{x)Wma%AbN7ICoL>kwswu?0a9z9Z(+uq@*cnj zO7*iC@Kgo$>m}$xf}8)xGvg7jip=BJPuo#VX}RXj3L%3P&o^VTA{)hch7wVEy3yPc zo|#-*IV9>nsGH75h!kl!(MFpROP7!gY978#6MzM`N0rsPf}B2r`;;?`jp_G;VZa|} zANSK)TQQkgCIBz~t2@dEk78a4{>vsS6r3Uz4mT%nDp z$*9o1G*9c6OQA-rzp5YRZ}#TGY^S#}4K?2h-229}s`S*| zLMdklv9Gqo6`e(rQ(%+Ql=ppuOeigQF44#19S4VR2KxA^gW|lo=AbaW#?TQrj094J zm<}X1;VfLxgx!zwr`FRQ!l{DpLVe>kXmmSf}dRDkg07q@qT*DO% z6WGwRB}0K_w%r$JwN#qNFE`BP8_9)B9Bt&g^YW94^ojV9RE+ZJg-g)K6dX|N%bH(5 zAIOg@LgI8hmDA0b(lfuKZz@Mwcs%SPl6Mz(F@y)@!6zXK_C95Up=SOj1JA@}SSsX= zo_JPDhM=E@$vD2T2b<+A#Y!kxA)hEPJ2shTUr8_?zFfccQs6g)0d z)k5<_^1W)k|JB|BTovnJM`HpF*yLW^T=R7kQEwmD(g05>oLc^K48kmZYArFzRWF() z>fcQ^iwdmA_30;z>YvAkZ_ys@*Jffdm1gRP-(Iv1m6nF{i<`KI03D56CrtIjQq#kq zmJ8Nqv=pJ7T6;5WMk;$?fvCzLhMz(&xjEAIluF2dA(B4KeNzc3WhcgH#8-_AjK}lu zx5nI=qT2+Cpz3X zyg14j{5hhLk2$5#bP2ih0?5-InoZ_PsSXHAYO)I>hv@XNLzuL|mrrvlWy22ihCmNA z8+>x;xfufT?BTAy(W-o*i1fl|821*7h{)8e4mnRLDVe1+8{^X8m-#!%Q3qj>KF}3S zEE^l((c_j#GRfgrKwI4O6ISn5UqQDHX8(zrb_?ta=hwmo<>|WM zJo73GtffOVjw?pjbl(12Ebvs^4z`&9BeBQvhW}@iYcP&rmAgwK7NH3pHe6T zEzzXXgkY|;2)vu!oQLd7rg*ctMi$*pcm9kbOw05j9=&jmc?EFgkAx`y_(IPmQq0lg zo2jUM?j%Q$QhT>ckpV@*2`SR z=_|Rs&Pc;8Mcdxe3U%0V;)ZNa_|Dz$0{f=2kiy(xMTTQJ(<$M@LVhR6cCG}+>b?7E zL{c0hu=jW-42E9`w;4LR+hn}wI$iY=_#p}2t_IBr7KHUBkW)qiNi?Hh@Yy4d6ij~z z>&i#RI#1v>$nx#ihD%EsFBO)y-k*5r;Q>9#OX$;uoBE>M zE9ws|=j1U2es;>XqRp0#v0T~DWC4wAAT!^s4Gw7*>e^Cz;hwz40X-l)44lYc0_nON zL6fgbKTkFLw*Hw#Nq9E-kj|Xy1qtjMp@zc`6>((LF3~Y$GKwxkk59c@Mj)?0Bt3rU zhJ!(UqwkNgj&6^IXH-z_kMTduhLs@o1#OV43nn2Y;I9@q0V$Mhw?uuw4GQWVO{F{3 zTjdL;18W(1)xq>MUQz>bTmkJsff4RTKWX;66%B_NsUybat^2pTqm@ zm6uJ1p+ONkDstkk0}Ga4sw$H3b}(sP@RyWYC$~4Kxp3bfaW>Ktof6q2zTuar8*Amw z{E1&+1`Lp)D^uSxI93h15mBEP|5Hrdq-B1UnAv^n>+-1xsz}%~$a0R2uWf~d^{?cy z7}K(9wJnK>CW5vKonwM`)5cyc-}*dY6mrC~dRjK93P2)3B1GvXAitBT?mMRmB$N2-TF8w9IDCIBXJOAR$IfW*=-VR**xW)p~^(Urr zUMmnmNH~;v^xq}+?owhMi1X2DPOcniA8TNR@5^v_RVFZUG*TuJ2nZqr`C2PqfnIFr zo*pqa-2ubl-&c~(ov7l9)^|@dmzw4sP!*=G5Ua}o#S)!ThTGcW-Rc%=;V&-$X@_nB za8p2i&>p+uk-TzH#HR%A!+p2cMFLgk#(H0}_T|bXA5#>>c7DGc zoyV&7kokq1pSBfKpQ2pf^^}Wz$-!Cbd*0s7)@PcnYUI={L$hMsV}d=(qaB6@C<{&C zM*A(!k7J`%+edwla6J08%v|L`lA%O|YkgBBT{5BtCA&H)B8K(znF2ftCjIdnmuj`t zDQ&12_emhtLY@eKY9-OhidA)|)1uN}Asdli#Ol^TJCM4pEPQva4nt{J(h&}O$vsqv z59?fgxqK>l@LkQ1iaed;@skEa;imaJQ4JLmiZxT9Gwnn}8k^eyGq+3dEULmp4W1k6 z*uA1~C%(c8C6*S^vDsp!zKdLAK2P^iKHSCuO!dS2sAG+gLhLtFf1G&QLhs>&?n#-F z7KSX4oK!Q$4ag4g>0>;;vungayFZ|-vFP9B<;9@!C@QJ*NxsCM_ z+>Fu3%BzhMlrWBd^qlGyc_a=GJJmf2bsM)dG)r5YMy#U!j2|K&Ynsf4-@STkc zGX@3CHzI}qeHMUfd-oiFl$-&KCfyf;k_M>K-8x?k+1|MHp{ydSYG$(?w!OAA%a|R6 zgc=i>%qN4MgS+s6Sikd5o_f}Sj;hFy`lG*)cW2y~jz6}jv6Te4&Ge*c<)S|Bi|bCG zH1_$;#Ql3oaJ~%s*TVsJ-Jt-3+{b-N=49_K+^H%lH1$P(dJ)0}bkj`Y2l*CXZ9lx| zT`?EGA##udM}H31ou_TNuk7iagJH0G273NYF3uBMv=m6D$q>bEGRJ-!C|ob?Jd$u* zSVXPKg~}F2ev_L26BYO1bAq*z1*024sn$1#FgZv$LRSaM@uu7Jjrgch;Fe;1A$DGH zsf<=2FePchnD9W$-#@DG`KC_49=K5-cvNB(jZl0cM+>N=V~qM^jpg zZc|-GGLlmD=3Cg=mg!kKO}S1d#l4nzqehpdos-)+`+J4vg2t8E(&~DH8NMeIPQoX< z&mlckPd{HKM&Wt!&Tf(UykKb{2k*mnGOyF$^zv{An|G_c0Lv!i5N^_B@t^%mvi<;T z(s)^ZjV*;zER{cZuuQK}>qCD+q|u4Meo6S*9$SGq#{DdZSNt#+61>6TospDh-;I02 zmX@@UnA@h-G#8bq(z#>@ct|O6c>kI`SPl>npLyWweC^Cy0vh3US=M@DDsiySIm3=X zkjl8(tS2lZ18ko>dma=;Y<{qZN1~>9p~?W9qm5`o87K96VK(T~52{|m z2bey|t0PPx%PsNSrDhXz8bR>q|I%Z$M|IT4wRB<_NU7dAVpfSSj3k`g!B_1>cwFJ; zj7@MkBd5O;RkbtvxN*s9)}|>-AYg%h);K#38@N+7$2a7L$z8}TUGknbFP4KJ?+4SCC7W(&Y*C3%AX4hAQhI!sW_Q5S* zy12R zTjL8~OXU+ceia5~V+KsDU8fo(@=h*u1`_wq29s{@CMj*3mcEB88M1kxq^7^hnfKA^ zjvP8F(ev+4KZanGnyvI2f(|(%Q`fu7xy!uc7hOhh$J(!EfOZnU$J;h>!|X~{&4Oz2 zA3Fv9vqMHU`|`!~IjVA5%0Ig~|Jzc*X-wRj8H!c0`G1}G`#BbFE|hHV+4Vqa(ILH_XKU{VV}uXEZI080DXG6w3bD z%kuB@vYdXm^HzxF|IyC--yL0{zaQyA(vu3cwk2)^Q4bW$I4v6(6G`AP9jJgW*@9<_ zaT5wzr50I&~O2ZtOVo4Yq{F)4EpPaLRFIfKla|jE3PK%!$pH7K!OJkZoz_k z2oRh=g1fuBG&Ce3I0Scx;O@}4Td>C6g1bxK!#m$Q^WB;6{snhfXZ7j@htqvdRqfif z_x?Tgyy5p-$i9Eyvv>xbIUq@64W#-*<<`;u<32YODX4slD)14InUsM1jDW|kG4%wX zyq*DaMx-9YIR9}QeXUP!Ow)MjrZiBPRs3adOBJYgn1|Q|%BQ|t1Qa4eroO_{K-vGs zwE6FH$i?n_=PpmNnUr?9;Z5Zi*WC>Q_|Vnt?=uW_8(wxGY=+Fw1Cf6+*o{x|r)WvlQf-a-N{b>u zwK!jEyM*6QHO4K0|6HixOHChT5Ha!~o?dBdztVTvlUUGO>&{R;nd6i6fNOI|Lgd z|Nhj2qeK%=iDD0#BO`Ni|_)cG6HaDTxn;dG-|Mzu;z+xsNEK~+uDtxtMow8O8p@=Hm*+g3lC z=IQt>ek3o~SV6!qQY)_c9`;&&`;e3++xk0;UgbC@8L#=GHbu5W^moWFAe3#+L)#n;Lim==_bD8;`l4r4hsRW)O1ng_ZVY8O<_evljq2Zrqmni-_uwo znRqhGEFuN|N`Z~T>}h*s>w3Du;B}B&=VOI0uUG4wyJ){>PO#9YX}OcNDeRXS$CsWy zA`4H6%;&w*j%>rIR$%>^e-YeuVBgZtl9vO34s@5$w zUDo5v3fTZ^jLW$YI|W5>RtkaNa`oMQa67aE5T=+HtzK4WQWV7gmYz1WYPFJ>>fn2l zhmC78s@}-`I}Rv=9)k_l+5Cvh`tx$DGEcERdLsq$#N-#t&c+u}exZ3xzcMVu_#HQX zl;}0JFLoEErsv>)Xv~JLeUtU6%l{rme61g;iHj_~n2Adgq_C@ydmm73*=-4{mYHHy zoqAxfb16X2=X?9(q7g%hRLFD4Wz=%TtiWXATm18X$kJq*pfW(bnmgCHwx5;owk&Gz zGFO#b>{zM_*IhYMn#HrjB53~5UbB+TN&xZfxLJMlOc^>iwMV7#iz^C22oUg`OyD9Z zyVWPRdbilky7VEf$wpX(WB3E7)?GmR`GZIb#|m^L+Vi`-c(76{<}oXVB+h~Zz{ z_yZ`IS0yTiSXF45M)IMet2q^}lQwb;vo`2uW!@1|v*wRMICKxS-{ZrU-qw&PqwkXI zazoFI2bKAH!%0JoGc%HU#2l4Db5ByC*zM(N%+f(!Vgl^FHBDuPKY0|`|1QS{uQ7^Z zWc*H>BIK4S8L?O;4}fXbSP%(bMC__g=2#dlpVB5|s`4h`E9_0%o|kyPdluHyJVyBb zb&R@2h}BKDgOBBO0iJHRrc`XKjcF*s-R0q#Si+x+5sUe`cWF_8)QO2Ocb0SOrahLc z4XAMU-bE2sO^x}vNZWe61A7-)-NU7PRY37HwMDSv#t_L(!$DVZ2?xb?V(!%DUJQ`1 z0$tMG!@RL^qk!vo3|ji^1Prv~hCK1E2K~+dF;>^~90;aaMogMqkLC_gNT3;`LU+zr z;LtQ5&+7{Bj-p|g0CuL|?3U;DJR=GiVYya<%H*QE6ak5(uK!ilKRZOTA^2`k?Izgc43ORbrJ=7>ti08UK6|*hh zy#zI#YX^uPEkFhdqT+4Pci&Afn#hS2Kh=R5(J}M^vjCO$Ub-^3??{I}JM33`yw~~e z)*w`;bK~{%%VK2QAB8$~aaev9N?U(o;w!VJflSZpvr%a3)SQd;o|fs9fX`KhQE8F3 z7mv-xRQ$Gi3$AYZ_P_N@R9p%8Lj?SqY1PIP7oZY+RV(jt-79S|G&uNj%oSD2{WR*~) z^bn5Q97VGffdU$%+VcWDU5{p`uzr@b*YViS7`x~^rZ%~p-_};xgCz#p8gC#}H&;8#IFEa!{#o~0T(BXn zQnoVbUH6ZC>ytPlAjX3x;QFR2!GKY4Nfd*cjb+ zYkkZ<05E}yH+bFO0$-3DPFa1fGm?s!#TaeDb;Du&Qfln$2yPhB5zty}blC^6iCQg| zH4po}t<^Lk@jW`MbwoDvH5GT{_W^nvPqf%tpbkpo{(Ew16|r5zlI#AAg5foE%iH-h$jBSBJXs>8=5>2R_k^&Wt^B1`RvOl5nG;6B)p2;c zC3HE4l5U)=_e$@bYe^X(zI9P)WB$0Q9g=Cg*fghg)M`4d=TkMD#zmJ9dv261`J%)q z9}{%5J5^+Y0rw-4uI02G| z|8tdmxvC2Ch`In2tXm1$__rnz!k6VP4lKHBxli!FWIU%vP~t~!yw6J>!*r?-5T8*V zs`m5-r>)s^(3Rg2DAz=`0pgDJcmY-~y0Jk11ajU&$Hs*Gan!2tHaRcu+iO-kp|o>m?A`kl%JnO9jx?H4yrx;AP@ujqSH7>a&NZ-9_5lMm@=> zYv7cFxs;sC<(hi|a_%P>W#$d~t+VCCqHWX3;AL3ZlEtFHWi48|!f6p>{mC}$&ZyRU z#A0GD4YVqq++yFzqQSL{&zx*j)_PW{<9vcV8+WI_-&AXVL*jKDg|eSJon9ruv^6(A zg>mm0A^cc=WE*FLd}Ua_H-5h5Rd#z3b2W4`v0t$LXk^KNLyb%xQ}#G1VL3#1RPRmD zh%eh2AR$K%IRahhl`XHup2@WaFWj{>9UQ^#i}VytHfSz(lJTtPo7~dF$Xo&*&MIIt zkz$=bSInCe8DbCCvlf0^Ft2k~Zj&A)m7BRZtlm~LH}5^Gs7;7#Z#;07TdR$Wc_ZB` zmx*gdn8$9qYj-TwgJ8`&^jf#B9h;bjyca~=GptlREopVeaXg9aZiTEWZ;j|c=G%I_ zY|0A9A9Us(FR!WcOltJ1A=;B1%=E0yOFA&8eED>t8)paxfVNEpsO66oec^wVO@Zmb z943{o1<3_mhkQ#%k~x=MR$1(JJ&Q-%*OGczeWrBYK@3L~VO1?cSIgZloU?C{D$$xx z4WVv>tUjAi>xqlC=0~50>Q<4{2$C~&%lFG<4<$zph>P`^#6bHCBs5GfCBt$SVeNJM zP6WSo8ckHeoAOU4pc3g15PWLI>)OeNPhW%^jSxKlC3L@%4GKC=5KYQIG-$ssRu$Xj zh~qMiR7Hh`3<%H5x}5OWnYoF`muTRg2PwRE+A4Z0`l&?o%iz$xVL8u3oth&QV8H`i5aJ9!asp8K*#GxjkLGJ z`}rcb--z9QNm8TcBVQDSDa-TVtCY-f>G%zWo%--2&uamh5^`RQ9=P#Jnb(B;>a^Bb z;ErT&D?`}hk&){*soQ)ndHBU(Q8;;o@3J#9k~BcvnUgIOB4=oli5&o4mifY%npX3y z0wf-cx@GX>#h4F9(_yVCca-xT%}=2enQ}jnAZm}}gokST<=H4X_RE`xht`VBc&y9q z2}BgknPu2*z0daD)eN*`en0Cm{&w@6?+zGj(Xi(XUka<6;M*MQ7L1}s7e1OE27%~_ zI4?=9=Bk72N(x66ksmJB-%s17K5P#ITGIU%x1O2dy2U?Dp36?#Zr=Dji=F|Co5HMn z^`rgSw${yxy^7_Sq;q$p?;b>Rj*xDj-)u+2%1d>c5`-}>_m_;yAgA9SR(X7bKN~Ga z>EwyLLZFdY(Tflk{9rmIILZM-*irx=FXL;pDn& z=w^F;_~@cm_E?@CDdB~pI(M{$P$KtSM&+AZj?-Tib zP@=Sd5P-1Xj#nx-inEG654`i;3(&1ly(_b-z(w*+^IRi>Sq+u&c+cXqXzvrW9)2Io zixc#HoCvsd;W>7S{I0`4ezYzCam?4@^RC_z*OP{}Kl*NVPy2VT3tI}n9AVjj{y0r<(Zy$!wY`jAFZgUC~5w9lG^M;B8vdfewmfvMg351 zQwR!;4?E>wL4Inwj1ab54k8VmUsI&k=Pqh@7}Yga&>N523Q-Ys`ueh*QSO=?o2ka! zCiwQ3Fry#B;;_?W#60%htcmE+$9dSFjAY^k(BQPUu8}aP*Yk3Bl|ZM;Gpuz+MHV?W z+d(`=QqQ;MVV9@%I2{W}1=qEBU0#!0pNqdgrus&9K@|uIjBzDMXe9Et%{iVbTRs%- z_1!#M6+x28^7GF=+aA6W$zmvfox|#TAp5w(sVH;>n-##|EePL;AQ(glc}A!1-_#_4 zzek`JMUbWtMUF&q9}NB3MKs#m2-f(9{kCFWkuZdk ziT7EzQieZyq$-eFHsYE3f_f;E_6R$_7QaQrRlKDje*sg6Jg#Z^yyCr$U*0SOYBvco zkapd-NlOJQ;H`-0YXJ~p20p#`q1TKHnW7-24yjDR-LvOo&f6oY6Ru&TLb?+x1<%s> z`Az!QOvWK!=w5serK40ZL5iVY?^TElA$YrsGd@E4>eg7`?6l@#%j&4lFk9c>uYJ+S z^Vj_RoIKMxL&@13fCvBcwzx?5anzR!d--R#Z0_4ANIcZR0h0}iD~AO{-r8Z8}7avkU!q!b})v9T>KS4#wBJ*>ZA6jv!+j$YN-r| ztaAK_UDNg1sX^ND@I(3t94oZ&>Q#?d!|}+6(>VFzV>m^srDJGh#fbW;bFI^&7Som4 zlcjTQ7j11@$3sqvJJhyg{@uHIVb1Qy4!e^K9)F@Jxf3BGSNEK|w*G+>ZY~fl0)ju2 z4cWp8el9PW)dZ>!i@}CALn3F+AIcu~swz}PJBK>cOJQY~tPOrYGz94oykKeuoJGmnG3_oA#a*zG|<3vaOqj0M=S`E%Sm6L@!D*9k!;JajdghBs(HW zD_%ok(w^|g^X9{B+=hQq&Ue0wg4E8vs`|_-Un<@G1EXpoeZ+I0p?a-Y=LW;oAauiH z-fqoleTEyQelzmkh&r~dx(dq@Bo%%7-{qbOE(CXrDNY3Rk-J>Kh>wnY$w9O*&mHo@ z``fi-MPwX|u1Z2%+=zrorKi5&ht!PIX7+Z0g`#}7t23m1%K$~2k3y-VOxuC8_8A%3 z{rRAqCnhVGRky-D&ct0Nq2DRzZWyjPV0fFu2V5W z+5wW|-cgyNg0@sy5i8;{#g*>f`rZM;(e0J!6t2DDdXZ9`@E7+hUB2fvGuOPAle3%^TZgXX zM9VSqR98fZjMU5t;6kTBl5S-3VaT0IZo5}qOJY>Tb}R9ln(1LwEJ%teaeb8 zT2y_h+u}(@v?1)a&y=2j`nys6Dv{eY3>E{fNf7T|K&4fUQ-~A7n9Q8&9dCjcC$Hoj zmVKbFIhUld1jfduH@Yg+d2~`F@A-e+U7dZo^RR=It#J4sDUB}cR6vWaF{{|&qVS8Y_tkg&Po){Xv9VVQ@rlwX@)qY}KTS9ou&Eln!EMc9M22IA|!VRKqa zK^||C^3^Z}9j$}OivQ#fk+8C8(rEudY~P}UIxcTn_^$K$PNZ{SNu@eYeSMV?`7{h2 zaR8{l;^D%C0my!}^2DcnjR5IT|1v>G654}QS!$?+>LapiB_H7~3P!EIbLWR|A5h30 zj2XAt#(=1k`MiOC!9f2uoLt*i&yJ%l{djk}_g3O%j3gK16uVeb(+&(KL@WYVwfBac z7-O5&G1X?GS-iQK7;3RQK&;5&9pp0M@A3%^=YE{w=5$-7h;+L`w5}p+Iw7dF=PXFL z9OmcEzveUyK;u?515IWR;+b$A6~4W$eYuU2f{n#cgQOU2O?pshIMO&X)w|DLT_WEE zeZ3B!*B(0vjqD|R43WKY8EtvE4p{D36F>uQ*qy?DQP8`M8UtFOP;s)U|H5kBedRuy z7Oq;QSYjBist+6sP%-ul(bZSS894JtRhdSECVDMX$WxNJIHAh<$rY02_!;iK=sy`v zD}zyR68sD~f+dKGGe}4R+I|}JoNF}t45{O^`5?nchuw(Rj^)$2RTf*g;PpOI$f3>H zzxc6?$irN(`Bt}jQo`tA?xIo1X5jfW;q6(rVv)(pm7Z6)TMz~tG~MISq8)LLano|3 zkKFf@#6-5@eSd7-Vgk3zciY;CDvmD9HT7ECdF6<^JE4-Gf6UJPAN|lNcoMEF%PYLg z=i*4XG(Z?;6p-O;Z?WsCMv&NQ zT-)eF`5<72IWc&Qiva(fQc>iLz`|EbYq5uGVmMei+AK64jkZ?o@xHxDolhsK+%Wb+ zzEr8AxlIt?!}o4NO*5^Gz)Uz=?zm~!p;YzSK*;BkI$k!pOi(a&&~9k$I-NVj~Z^(cJwI(DDP__DQY*kAfmrlQtS#BZ4=FJBV7rI2<)fR^QO<1|NZ{n zFM->naVS*l({uDUNLV>f{T&VsNeT|jXa)0G_rNtnYtj64@y*0LP#|YM7L4aTi@|De z*x9vkR|lGUqSNli&PdK~c<2$Ox9WNmPWll}ul%?=h5dsUi zo6J+chNDR|fW`RqYk!j3$y&E@e<3o*S{0UDYF;5nzvk*(wKpBup4tz^#@#(w37!x~ zg;pUXa23($1(H=`T1(?#^(@Y2h@D7mneX*N*L0(}E0LR$-Otf31MA6puztnMP=Pyv zh0@5qX4|A2ZNK+CWg0B5owZ#2TLf>Dv(qt}s zZCo$(Zbw^>Q;L@xHEZ`Z%kmmO2IN=8F~mp$`|IiNcuwLmb%;W41);B=-wf$-$d0b| zzIW_TF zq6kZ93ubGB-Hc!ktNR+m?vMe+<%n^NVgcyx7psj`%7|Sv7S#>Qg`7-%qAN8$Z+RBy zMW?`623p?)zcos%g;IfRgTw=gcyfy=3n2Y)SVAAn%K?JiNB;Gx1h9)>{S*9pH-iN+a0rU0`f;a@{#fA3zJS~Q zcttpS`qceWZ%UTReCc8T`oa8Ysyj-9_+M^1wLK@Q^efH*^LVX6@}h-`11C}*@`Xcx zCn(L(LrU2LA1yYCYM$rGl<$tphr10-+FjIz`w`+!BoY2pgw}QN1InQ9vbzQ%nO2p( z`ZJ&V)%-Q>Ji!@vfpX(U=aVeKA?VbRS>m@HM1jaBFNp}6Q`)YTDijaO`aO(_wAKb2ooPZms8O)QdBB1+^> z-ka`wQSrG@dCbHX>)UHcJ-HVz!~)j(c#@^5PpMC_8*IWy*|$xF^P{Scow5hroYyST z9HCeC{KC994JNUc&y5zrI6gaAx)wN=VyI-CNlz^xj#N#Q<0$+3!YFQrr$u( z2?ug9OAg;H%f58nE0NH~FFL$&NJr|3g#(4mEoY|@w$VH{!o~WWos~5yF_}2Rmv-Rb zttxolmfaWvwDS-mZ&M>PW}{xf=#OxlCU??&^mas(3SWC9sDZ_Ff~qJ3|-+$ z81?o~cuqf#-k$SMb!FXyI(M}V_J{$z6B>I&LQ#DL^?W)!MLG?zU+1x;+C>?1_E_)s z_mV_M+DXykM7^#KCrlO)8+ue;SEDf{AQuXr#suaYb<5&TlaGRtoW2*S*Z#6Dol+Ov zjGyaWc?^_BnL{MtQRfpo#`Sn;d~s43s4jEe9#OA^`-%10Tua0A(1Wx}P>{KvQY>*G-4;B?x&v7I!}^gZ0n{I+v_;&+r!>|bWD z^`d-8JC%le3o4nbe%CH$#|7VPIN;3G>Jrtey4;B zQ7rEAILde&ul2F&Pv$Ap^a{UTQNhXov&AS$l-F*J`q!CHCkJB#RZzkDrj*nxsclF& zgw=+d=wvt)+#dvR1drFgg^-P&QT%=KUz61TOjYkGoND_>U})5o6Tol2es7Cdj)OL7tJ#07Y5)03!HlK^j~ll*BKfqS`;X%*4m_h@ zM?u}J>i;zMKbri1UI9BOimasWDnKm%e|#`CwgS>GIQ0A_Q>~}A{g0#LCmf!r!R&$c zyZ_^ZBSm>%W+2d~cqZG>{N2(1_h+NZB19R?T5>1-ePIe~v=1LXe2in(pms9;`oVau zs=U1A<;?MCDr_ez$x^A_z8b4(YFb*_oQoBAECyXu1q2XU&W8Jy77;Brqn|s@KX3l) z3vlslrHrw0j)GvB4*J6er*&IpCN(w^JiQ5rlxf_r{ZFq*Nm+pX8D6aIwnaNOql^SI zNK<@9QnqXkEPNzlS(fw2LSO1L4>R=o*Il=zihlmg-CzK^0BQrfA8@aLXQCV=3XrC$ zRcNCIu&11r^E9g9(XUSE`o65N#Za~-u6t(X1#=|-NQw!QKjNDrMZv;K5T z2ltJ+hqlMu5v|wDG0Iq&IouyBf(1Jt8 z8pF-g@fs;Q2EBrmJ#|B#U9MuPT$$y#nZfAI#-DZZk-~_csMmQs1sY9B86xi)Mjlz! zGD9Z|wW$8AIc^Ow3~O@7k+n2E^ReO8_2Z=gV&U=4H?}7;-rI!d@lc~@(!2>&W|%up zc8Uy|_fYB$$FuRSB^4eg?1qxh(iSs0X=E~CCn@;Cy(xkSvv1XZ`7IfNb4-$cWBc9k zEkMj~a9%@(Vv&Bzz!~8f;TmxjbeyDp_HRx@dM``C*Xw0&mBj9(c$;tiY~3Gi*hl;* zN=*EAa3%0Cww6?Dhd^0|3rCT;(s($l8;?FgPqW&Bk>IU%3{_w_(wA?=v=QLE7j^hd zYmKGvb1Rj*DvtQ0{E4p}M*(7brFVP2eof6Uj!hZap!s0r&&70)VC_HqFhiPPnc34^ z#0F;Fq{bJP(hb2T#+|prC*e-LoZz8EPT8Aa6Pox>DGecagy*r)hq;ZJQEL#Y85ert zM;tmzKI)3An5zAbI{csL4#B^bBDR0l5loM=Z-1p=@eb$nK&teST@v(+$n6pTCb~<2 zX_3(^QLcdwn438oKlb2?q|dAFii~2qiU^r>k=pA*=Ul_?G;FBV*@{$EA7lVX;l^)p zXQlC~BZH%rfyG>9REi)-8CtA;^pU7>cr3f8fnzjYq|A0Sy@mb{d1-=q+Z7Q`bv1R9 z+u_T5uUC^d&#e8THQvXTm08lpwN}A=HjnleY$SskjP~BnLQDe7j4dv;Ezje52oH_e z7#Rn&#v@5Y+zy+)VN<^%H&RIwZ(oo0M~Q&C)}2D%BS_QuO+m1a6M-mWq$`?s1mvwo zW%6=~^mF8{lQh@61THFgS&D|9rs0SqFwRGCYIpi9kR&BBPMTn+uyB9*@xo zHAF?24&)})>>Da@$`M=l{Mjlh%;$EQVy)h!0g&->ERK~6ERhRzs|p)qQh0FcOOopb z3pcISQ_DUT6)ya|!sqkZ#hR_K$(L=PY*8baZSqPd5ptB%thLFTeqP@iQLIs+P)JOW z!^Je+f6GOzX)kCc{(uwFiC(q&6!hQEuEnK-fd2 zAKfDQHudd{fI7Ltbk|qUuEY*`y`$E5S!}j$+mGXZtBO2~MhRc2)uL0C!EPaS<6LvVHA!J|d%O9~%X~1;#<*Yl~~80Pa>jQw_`g(7)y{%n#cC%B)lWzLyo(p=7=;GHMw{t0t!}OJ947 zwVS(bs*qYV+eU+N_*n#W79zEoUvd_^FF72sDNkIRz!Be6 zxY^9gs@G^Otav$CNyDV$V~NwTTcF!HcREqmIwyXc2t7Cy*sO~(8aA8vXo+W&48~_I zw7A@#BepA^fjE(mZia5Y)_(c&x6@cNO{LSFZ3|hPV3^!;Tjh%@e0Q6rdikVU^Iz-O zKE}EBQSY}nGa3#tU(e}{q&HEtKE9K@QaN>cj%+BWfq5<1y-g@wBooiFS*Vh+K*b@| zm#QQ1k?c%E@bRjDtLijMj}~k%PO4RHn|0_C@;&!9Yn)QOn){T z*c3aW1XBn`*#d0*V!ftMJ@*`9t_L$zN*$AI14O8_`3b+o(crDZV+W@{9(ZznDahI$ z?jqREeBOQmKw7yrr@rLA9NF$m)TvCOTk4HSjhSLHjFn9IV>?@|L+3Y=z?NWd_@-rE zZeRhZsUY$+pIyV_{{%6n^^csFf1Uws{yb1W7HvEaCFO4~QEEs@>-_o(9f@9>HaxK~ zxYUsLvQMXlf6wEJV6w)w<@FYtPmCOqh4vYPFIO(f}&bv^t!*?(MuRV!2UXe}b{3a|&2q`o2-&muM z(+jd6(lkgeMcO4+6>%BVj%ts7;@eG!O5a}6nxC1n zzGF`qK#wt1y{|a!68Q}byhY^D4CVx3i#$0p5&2+zmN!a`^%%Rsxu4r@{R;$;WKg?U z{rxwu(tDG=TF)VvrY*~ut)@u&HF|;pXllc_twN2=(_Nf2Ur>uh8tl8wvWKyaqhaCq!DzX{)vl}pb zv&n=iJP&;_1tAq-C!thyuYB~kt);+9pQuB;t!HB(xF9rQW-fWt)vfx^<3DvGoBIw=e#e8w@U|0upK2gY2{4UcE6pOyM{9@SId# zYNkI)p5JZDd$!giP`Ks$CK|I!;nc6<{GzGiHz;9Wp5wkQmz!;{sZ+2tz(WF0Ar^b# zkLu-3i!*STbU4@4jv+a)9)}2;rMZjiZ^F@ZCjqo?(hddsLA4*LxOUw5aEGUX70muA zH|xc^cIn<7YpD1N?);(rD?xWQb@7wc<>T!+1|y-!nmls)1~u>|gtRkhNcajWVU;_5 ztJzxx*C3+1y$>ZufS1$svy;PU>^|4>V1xh9jOc?wxBYP&Wukx+SfMmL;kJY;`XfF0 z)=)xOflZozv)Qz;%R~S&d<<#T8Z#rwEF&yY5+05z*QSS*aZX*fjRV6!fu#IUjOY*n zvmBHp2x^RL^)34ZO?2h!CTCK-M=YtUB55KeB`rzTkh{+@k_sQ3S*9&K{hd~KWHy=- zO}H+J5<+**Mf+WeM63c)_8on*>RT-;t5Qfx**iAvY*O zjklv(U#4%SXvYSAduL{3TLO~`B*qlc1de&+_foKAVvs{IKIc~2XNmr@=<_sYBDeJQ zH;PxrG}#|4^ot$r%M7(WUk7OFFJQ!&mj8M!;#s-R6X!@cJwZ3h5y!ZVEQ75bF^Ztx zE0*y?j34#TgdP!F1^oa+QZU>gd*SO0#pXn4)^X&goOVML898f|Jo$iE}*MN+oZj{r3!L&@?0??6>yV-p}|4FZw7-eILT) z6-{n#Nie>OXDnp?k|d#B;3VuN6nRWQ`q=}0nI7w2Z@=4R@dfn8HKykE`J^ow)&E)- zp|T=WK;&1aOw&ugy+W{MNVskzeO9XL5mwo@!;jScLxt_6lbTp@c=hL37z!EDAGxgg zYsv_x`MR$3W3hJZA0#|+Yzav1bEu^n_TV`S+KzYpfAag&? zAX{-Xq<9oJa8?h4Jyl0no&Ku7>8Y2xqQR?u*|r7rITRDT;+SHJR!*~##x&!ARnv9u zE!=Z(6YrUyAx5-Do8Qg9iXFXadw`Od7}6aUOrIGEFvL&#>F*-MkfT)#mdkBDYy!hTQF|AYODb|f_XcaKPM z67IpFXwSB@PM1!YS#B79cQT{5Y^%JUL!FfM8JjhUPlr85;G?6o(Kqp}L(olh3?U zIu;cRxHTk3XQC%q!TjcBe^~r23HQOjVih-3s6Nxc-?n=~2n`vxm?qMU0>0WIy{!Y` zEw_@lCO8XCh60s&Dx*3;0(+sIttYcB0E6sWoYLg3P50<%aVa`&JeAGG-w-R9W$At6 zy!=XUpK!7M?Fe9;k1dBKbA8?;&o>+)h!(A1)%{8#`SUPld5%$>Xp^lTdP2?{bJjGu&Lo5rx!uG@O#0C7OKsN{YK!`U162Oh)q^Xte|t zsTv`%f`mB5W&jUufBnca+$94945NOpV_uuJZqZ0gnZ8x~2GN07zoDau7^}ZVrZy>z7JcIMjP`8|^PGj`a5BmNHho#<< z0l`+EUvWaPwUvIkfnp+Ig;PxC@gDy7!Qn{H?XS;*MHDgcgOnf7pz<}3*Uay=i`Dyc z-fElcO{b;ixCa>y;0!Pmle4@vXtUD?p9IF@wG-S>V8(_?Y%);Qm%ktQ!fdo~V90_e zl7_f0q62O!!f(dr)4FIdqDJIri>YQ_OW7C3jDz4tFIfykR@#yJ3Xaml64I@JI z-lL=A7We;(n~C01t@u7me4LB<^SYD9Xf1)^wreSA+~|~h;-xok;wTT;H8}~5D+cM> z0n>`CNls7|anQOLfvvyW@_hntTNWx4su=bLzM|>Ndb|NDb@*bg-jt#r1ra~;6t4aA zZ_DL6sL8;pEof6?wHV1f#C{ z_KkOG^=8%z{w~ruF$C->zunT1NU+885)*$JA~V{HlJhTG{?&IFE+iMo)E%Ao#}$e~ zxv7>y=^W~8#rWwj%ClZEtJ)vDsAjs7g?#Q zD#LCwU5W{RorB&g|Bo6n@EujpOI$J4->ih4!=c~8TIg1F1N?sL9!dB5{J2&}wG8({ z8+h$jaYbRvvLKQ@Y^k9n{5}W+982I&PSq2KM~}kBPH{Lp&x%wGtsU2nuGd=&jbfu|KF z$)<(sk6UNtM$vr4HsQQ(PtL^V^(~3`E&F%G+gV3H<-Y!Qa<@UWb<``Vh$WpbXkWh} zv?2Md82j^nmQUhw{xU_*(?TGxdl1%vB6dz$fAo@#36WRu5=Cnt++;x&31ZpV}8$F!eNG<#_N@N|69ZZO;d%f@R@HSkLh9v=X31=mGV9%L@aE^w zSIpw-WcrSmFO6j!OO_XdgZ zRw*&q=NK`(guR(@IZH70+~-V~&x~R>%jDY^;va93E3#JRHl-Q1%E(5})LovlyByE@ zXG{`mp@uKbFu#CZ^|nD_c6uti@Q zQ_12-IlfDvMoZL_CNjl{tA*oZnCArY5QdAKvp0i?eIo0oBE>dN_I>*X0u*6SPLSC5 z;%vOy57WpHU+ysh`$i=O>9c)gkiQA)AhRG*T`A3DFh-7id*%DYcIZ45CwjDt(LPej zuba~8y|0q0@}Vz%a^@re;Ss2m;YQnnoKLeV(Ncf83HZ*%QN(!J zQkSoJoC#7QR4vezW&xL{FT8q=7;{iY-rgR6isFH!MOzA9IfD`h=+|e06gCxq9hS{# z1|BihD+8T}Ll|t9<|Pv=lJ0w&JNZjn&GJhDYv=NvQpCf-k)jvUaTx(Db0B@JH8jeT zi&!>!i*y5)_K=_Fb302Xq#K>!C}`Rh@RT802?SCPAh_I`3B011>HH?{o{ySEdYYwJ zYsi$fQ7+T6gD&z`F6%x7jbvcM&3rrG3;r7{oqmRN&N#n7yr;nP*q4Q8iEe_@sRj!f z^h;>e-?Vra^_I|{flSAOJvhADoo}{VVTw>&O-du$o<_y_b=HP%SnJsuUG5`o zx^y3|_m#;M4+;g-8KuA$bdsZX^Hra=n3E`lyjhkvlxv|ZT=F=fRr->Vd88|t*869nl9O}V_WrDET#-){GLTKl4Y1vml7?-|~{h-1=aGjmXiV;3_fR~iTQ zz!}(Zn7G#@M~=~Wg~P&8A&?@QO~>98sTUsfJB8>>UJe&lRPwEvW2e}!+Icyb)48}b zplx$7elvL#G^xMx10Gqnd4^&zo)jQPaSC~d=StWz%*!&Uy;kk`+P&_B_53qI1h4~- z)#{}?%%f!epn7?AvZ5+3EKBbX8)>C1Y zppjV0UWHqEHMqrcP`2hbC{Wa>>ve{wBZ2ew4S?7CSx`kM4bMs0V!!G?l-wjb5W~O{ z_L?V>$SMD0Qyl4(=ZIFKUCWiHKS#|^Zy@ujG+ol<=^QIX=BvKse!)Tz)%$o)SEHa) z$;J)AV*YZKr0#jBT7GwRKtS)n4qY5woVu}%mg1g9!0T&vn2^-$xwH|`HsRxWr4oMT zY5P|#WyVvIkDehKb0B<}xbB&Q#3`MS;~(>nYBT70J*XQYenmSH*!l$U9t`a`A7!s} zj%&8@EP~*)Q?cI>wV%CQAsPsqj5$B$i-XqZnx^M|m|gFYiv+^r181WU>@wbZ@VnA^ z?MAmlnu{6c?+zf+Hzb0Puw0JFflcap)*7JlzvtKq?<~p z@jMP9bQHy-=cR?mo!Zo0%6~Jk$69y_ggkC`T{(<@i;p*Ic-FX(-V3jz=kfl?7=QS> zP8{Mz6kwTwGBM1DAjYNLoXd^Q2+XNrpqkeZt6>R8E1U_@Hd6Els&UK-*&D$fZY@T? zahvIiwSOi(GbxS-Ldq=49j}^2h)x>zuvIJOjc|Ffk?bXo4>}~!6ZI#yH(Tlg_Z;p{ z+cHoN@*sAz&0o$~zwEjaJ&t)HeEUt14x+)6Rf}yE!Yi670N5>gscf!U6}J|DZqwO) z^2qFd{{ktQ^c2CleWVCNf{LVC}nnYm}*ia7r%XVf<^P-~E(875fg zwSzQQ0)q|4P_fY9R3Lz=>NMYacjTj_hO#?(gGnT^=@xc$))gJmFpkUk|FHL#L0PWf+b|$VNq2{I zOA1JdA}QV7Al=;(($Xa%-QA7S-Ce>>cilAa#ok-Czkkg8;hlHp`S8p*&Ltz)b;df^ zx#C#IQjn=#?K@xWDb;v*$$ujlvB_n#zv7E7nOHJVF9h|4qRhURwZ8UHr+Lmp&Lmdg z^%r*0un#Cz~!PRnEYgrd&&cWp6 zBY<{?YHGKnrK^v5wJ)nzHe4xynIduknsv-s+K7Fdhqntte0s6F?_=2`KG0HbLEwzPK2sjH9s2B7XUHcc`<->u z$LyEOL84dBEE}MwAU26%HHndwIbK-?N*} z!s+=ROF^{}@S7Erh@kUYEg6nnD{uEippS8^IRMd$__#zze7&f8vaE)FFu54$K}jqtKM*M zk$R($?fL*2c+snrFRSnz^a@)rnbXpiNO;6Jw`~VjXt7kQnX1|CESkG=gGYL|<}^vL zUn~+HSS$q89-1{i7@6!4a#qk6x~(f&<%T?dda-|_4lVHOeX-*|H|ke-zj8MMsIkhX zQ>=VV4o`1&tL|l@`#5_JA$=0U+aMhf6OAMP!!?FOzgn$Pv-y0D=1!tvmPBmJqT!u< zhJs@CbG1s-?B@Ym70!v~^9`B@dts;+L4ri%`?U`)N z221z;n!&82Vt*)0;la7dEFjAcnG23%QP}gfh^W2JzBp!Did;UXzM+}|((9$`O4ft+~6 zoBtV1A)>A-SjZ1{D_C3RqzR*z2kn=y{ikWgXOq~BY6{O^>@fqBXJ|BW2LvqdVg>T5 z88K`^%oF{?PPV)bJ=4!c)%u+|;4i6k7@v(LgCZ=j;_SLv4^e6KzO7+)1LY93D2or& zl{@=3y=elic)2j^m&X_KqzA><&YT?@r=bnOPQIO9BpnMneS}xjYEQ?f(FXv z%YuR=U0ql}Gbg~wpp_zRjsVAO#47b?!pNxYC$g)Q0-5i4rG2#?6z#AR+@DO8g$r3O z!n7)N-k~>)E%}z>Q)Kyue3vsiiCktG=8H{y=|w;{pifW%sj-Rj7)fZbF+v*OWy%q2 z_}VA96-@aS$N6p?qdnYEdV(nn-zu7mp1en$YjkyptFOfFxo?IQmJ|78xRakISIES` z<$(^@MNV3;2B%z>w>Jyg`kKjK`OZB37iDliQ2FMOgd4RGIJ3-8hUk&2Q zx695nSkN|fa=quJ3JtEC3LXn?yB`5`P;R7P9lB^(Ietmew|yg26Y4O7rLI20A@7LZ z6pvuzKxzV6*<+6s`6hH57^b9F_Uhn|V9>Q*{OEzI*Ei61Dly-ul-B;Sb(e$$FRPf! zR0@SV`~vsI`IY>%g>maN=rMdq$>+}!Q9JN^(oSG4PIGUCfFt@&?GY7lf(B<$)EF-U zir_^%0CZ;-G=R@ppfMi)s;O3GN!}T%%>>=gTcXvXRE7^rVOCQ#)}*LUD_c?b&gcs} zRR*=eb_do5{PHWlWgs!m?QxCLt4*4Q$)F&3cQ#$ZY6DSo5L4q+74^L{*kq)Ok)XCW z%D?|SDL`tMAbQN**7|`gUoEp@oxL34BOx!X1WswBFf6heep3viw%Vu3;tZEH34QQT za}sP^Ow%LYV?4vZNWEF;6Rt9uWWxPx4-q4&WO{1pl(9GF^DE?0+;&FXu9pc635_b_ zhHU$fx$)U5-9lBRnyRVQ6A*oc$rAc(y+%y!9h{ z|K|NF&yukq%50@s8iRV192BkBT~e4_h8*{)FgU#i;F+Y7D;U^MBDvN(Zpt?DkCD(z zl)}eploR&xy~?(!d@h$|q6>C@6{#>rSo+%3EI~-*d2IA^&m08wyo3rv~5)@L`{UPYMjp42BO>?Dm6# z;0$GQG-!K`VxsGExr!FEHg(bLoE{nt2ADCt z={HVSjNG`Ah7${}xqXM*@Z>i{d*_2Cnggn<(~j`Ghly|M;opBP^*rMUxb?WG6SVgG zjqdszJl00-GcZeTciU|o%)$eoEym2IJD?XsCV|ZmJt%@Fmt#(=*k{g;VJ*q&20kpo zXs_>bfZfb+HUYwVgfm_vl5x~rqBfOgYYYP!)GOr3**=IDyK-dd#Uj|=ZrMa1 z+P06ks5unwzPp?Gx>hhlYtdIvX+^C1+Ubi9kZY>r(q%O~WweSR2QUO9?q6Z4I$lTg z{sQg^`zYlhNx6(SYOkI0WgGTl=fmXSFdQU5j@B%J?c;lTL{$y?X38w1=`we?3h3vh zG|S%^0!eX>G@#YQE(KR!=%)E}G*Gl8!BFc+Fya9C==?h0=q+L@hnZq@O1)iD@0F*aqr{SW70L@Sz zs9TK79FNr$`L|m?Wu$1&!o)n|KFrlhiy_CEiy1K8B)wQ=Tv+Vrud99q$dT_-@|v%H zdCSUWkc@2B@>~cZge@v% zRXEuDXlTZeaSh+;0rF8IL%O0B&Ps10SRZ)fesSjFI_ywa85YCZmxd*w)Ni{M<$qHv z79t`dKw=XMYYS&dRUL0W{M7at>P> z%zk~kH+xzT`CgNbz;I^lJv}r+83C7`!YVW+L01WA(_&$kb-&SYGmJ5&sW!evYAEq! zv{WWtp+rd*%K$gGGxVfJA#r}-aDi&@VCLs2U6Ev#J;xnyBkDM@x?nvw?bD)$I}rl zT_f$!0)NU2n0C37`9Q$y;5im`VOX*PHEoh@+j}SXWf2|4+vZk0l#zt`9NZ_OxtA&y z=%UKdua)F;8-Z%9736;0%e9VRP=f4~AeD4N3C4O#y{#zm+A<=&_t)k7o!~72j}wM% zfxLKyVl7qb&vnnE?Io4S3Rd!E=HAdoj5ssDc3eM_tv2!YeLe-P5Fcg>#|k=n zQQxhyuD}}na@_}_Eg?nU7p5Hupi%Af)X4SY^OTt`AJC$TgJN0mNfDt!^H0d$+wrDBIIh-vq!}s^v>e@mt55B?i6OMuns7JJ>4rE z&4aEx9Azf#bZC1rzW#~hMWXw&#l74hvEGDLuoe_g|w$E5rCA`jyPFyIIx3}e$csy%e z=`CeEsunTNQICAAc#Fo{$X@LqD=MqJ6FQzF*KW}P|=*(uJ>fD zjdD9Shw*~l`bpF+KD#7xSftVgp^)%=SJIinHz_uy_Eo^4(~y~0Ymrk`r1-@j!V>(^ zN$84g@O2>cII$JTOk|^TRx4EOXRNQ;s-ylkT)W47zL}e1XnZo*^S(q~SN>lg=!c7W^nhw=tL$b2XNyVge3NFn17bzr%;X^8cT4O#Kt50N>D*_9#|1?ZsZ0Zysc)rn!To9;NX5?NeA zO@_OQD|ctZ4R-|h4Po~yn?OZ4ijg<$6IrLdlH{xxY8Ugz<|Zm z_x7q`iAncBw~)~B1pv}B9}0VlGx9Ot?B+*x4*l?v+ljMK z?cFMF(RvD0Av0V|su-Gj%jqe^7166TWJ?X}%F17W9yz^OBvH@u8+(o|q7=!6DU0fE z*~V8?P%imnrobr1R%Yqfz4utZW{NqKMMUX|#r^S|8Kw;?4N|cnJVLK-%uwP>w%wXq z&xz=b&S~5VQ^1s_Kqd4UOh#`&fsKDA|Ku(kP(X1Rv*6_`elbU6P+ZwCOe=YrT%;O? z<@-g*4}RDNBc6Ip(&&hN&{PwG@>z_aQc5tKRrgBjFHDv0w$6><#2!_jw+O+QaK8Pz z%e4NI8pLohB#zt0#iv*)!_s%1bS_+VUBfB%kcZS z=)af})Zx@umlee)$EkhvPo}q`Hg{t=;2U|fXQ%_COla(_Nh4GA7JhqVqp52QR>-UP zJ6!bsZ5Jh)xUb;sEbBLOnPoN+_*Z`=v#}y|5IWzD>>tZ3V?rU~s4QnN20?yS5y=ge zz<`dUoT5Xzsl{ge-cRyl9~CNu^r+0&4N`orvg-ue_iSnqnJF1}%_oSl)f>-=GY+Z5 zZ=Mpfri`@W-#x6nGcC38>P|2*!PH9*okS^W)+t_-(j#2s9=W)rCo$xnH|uvKZp=V- zE;lx>U%DP0Kfl0T{x3-Ps0cwFwv2UpW~oj4%N1RZsHHLJOGXzrcn9q*Yh(4WDzZCP9&{}E~qT1uMJD~|vM#7Vv1k%H1X}pL28CGpM`Yar$?pK>H z%oL$Ir-h<2OUg&dt02>7;0AQ6I)mY3o7V446BY%{utL#815|45@)kO(v-0J zv}JYx`a%Rk!)dn4GNfUb%$i9LNl13IMBWT+zz_Btd{(ZN-qet3r*8Pgt$2RXsf+%Z zg@~Z-&zEn_Nojf_bF}>CcJ8rg8l4ln_5oeG^!Ao;VIHn66j#C;<}>snv4Der1p=6I zfdK6@zD?mspbY0e*KyVe)lbg8Z`AQ$Y~+O-5{?7SRxV)sDeFFTki%jN$b^#4@c06O z5(Iz55+6A@CV#k4q+Wx2(v}0$m{zM{$gsfAyRQy(Rs*h!xsIdIU*69=kD}JY7vga+ zE$!1#L3mD{VIrLly58v)JUN<__R$z-e~PjF{a_Cn;)J5}miv0srT}vr7iSG77!in8 z_hJ}SsV2f9c$e_;Z1YnkV03?+oD4jJ&{`-GvaIXuAGN0nW5fHU-RAlno2iV|H@Vwh z-^a&U2$jZLr`{W$28!!P!*ichv__a;?$;%-3tBQ>3|5OvH10eOyXnTOee5o`>`3BB zyU0P6A6?syq398yeJy5l66us%u5Lec&(26GisBg}6>)PG)pLtI?jqQ?ZKj4f-CcNq z(#fZB;^!;*ti*EEo!Q#tRFU~dAS9Ff{oHd28p$0r;ciV-2Huczs6y4;q>?Cq9#So& zg|S~>2wrcNZ0KSt%jD@~Gt%U|*+7dYoAYxP0f7Evu_@{4egyb2_)3vRV>6~ymLK&k z*v14^%SVdlp2Np{%e{Gi?mv&b6XM-#!q)d8(opm-J5EJ*zr!C9zZ>>E-$aj8H}vlE ze3*6c%!$78nh?rHUqh#Dx?+iSr7^7M;BqF>2yFB2rBAb9A;5Mb=-4-u$IWvX@n^#Z<_FQe6SBITDM>O*CZnE*F^pT%Rb zMHjSnxAD7llsE1Mo$`PLkddeBPlQfpA#}UR!JoLI!}0m7;~!>}ns(8X$*1P|UU0c~ znQ5Y{L1#lix0~^aeTLsjnT1xZ2_};^h2jffEmo^O_-nA4m!vxbz*)l>+z<3K{D9b^TeaiqP$}XKeS~!-W05Wz!BkS41dqDFU^>DHmElochULd%6f5ZCDn`Ozyk zJsiMgSrQ)l$mrpti)upkaxBT$Vr^I2H+Z{jbJ3tR(Rx|K7rknd;%qj+JJR(EM)umg z!}{Ei8+QWhfLP;BV+Uoyc1?i`ge9NFH&)Y6-!wDj_mRY4g`U7}KdbMK?20~tore8^ zyuH{7X46BS!VO(*R{+Kin0v#17nAhcYGQsr5HUo<0&2?fi#qanq3p8qAU)PI9 zOKb&Ez%ID{UiUORl94?wK1aL`V8*nW;EA1z1&mo?#x@_vy`FI54;@TZnx z@>5k&6M7I*WLl@B;NW*Ssx1b8Ad+YZPfBxhzBS{Vl%doCR1}#nSFod7Q%6RvEjF7U zP(K!Iy`_5d7ZJ>V(R#o?@q!^p+zj&$4D_Eq@gEPQWI(tHoplxM4;bw483D<`9vJ|W z)+e*r+x1jT^&by{Z-GidN`sD<|NYC4KZ(qMlYxxMXowE`!!Z5hOFo~0*NgXj{_r1T z`a3l67(^9*tQZq^peFdMe)hkA10E=Ki;0Af_>VC?;j07#$bq#9*!jpA@SjnK-yc9n z2VPHxLHxfA?!|kc&hEwJH}e;Nz;d464gpW`|GM;8f_MeIv$%iE_>Z*}1$Y6{KTA4) z{+|cOiV5rttAbO-tN%Xlzt%Pq$aupi9zE4F{xLFOJq&1oo%z2T|39zo{|_6#;wLy% zn!P`3i_YX+dFle;CJ9k+bz^Oa0oFVsOCu*{O zc-J>tc?0U0WFun;Q+;wRC}06mit-}|!0`WwqUb3!K9?!YxRG=WW8+kRivPIbcz89d zFm+%GSC4cBk458N8tP}d3MvCNw zf0r}(sa)w_NsV;>3tFW8aFqS-ok7&;=5YTIQDYWRPA`}J$?!2Xd(!c6_d(?T((VK+ z!N)cjfQl~IZVrJyO-{Wmq+lfvA>^&qb|h5a^5s{XSAY2BfHH zXk^P_x8kw5Y2lX|pOh^~n-6xV9SXT(T}|r?OO|}ns1gElbuSUm%GBFKxWvF@CyTgz~Q77Wtqe9T@ROld&boJI>+`gZaEw(@68Zw$oBlfQhKEmFJ_1t3bf0ZuW4{y4?Q zjNO#E29r$E?Qu6=z2kloCg9rLa$3f4r%9DIgW}#4m|V{ zhhW7B_)KLO72_Z8SbxYxKDj94sw!eN>^GS;?2r9wXABe+{`gSsaoGG&LrO#eWY6c~ zuo;uTEYy+ZN`H$3if2vTR`H#7)@iwIZtHTIosYkpFIGqa?O}><9+4S;3$3X3<5i)3 z*E#o8G7T-3t`25$?TmUa!KdZ2HKN<@B`F#D=DfC5^n!5qk&sNTRSDyf&FclxZJ8Eq zAyKZNu0ru!3)9iMus8QT(XI&7o@edUV_8~=`D!h3bPDP2pQbayPUTb^jlQXolRday zzhG^MCm73~{|2NsHYlbuV#_Cd-kdJT7hEQB=!diCEh_B7!>Hs)Ca#wb-&1ONte`7r z+G5`ooE!~^0#RkKzmTz z>~;|#@3HuS@O#!&yAdH;?qy#J_uIv$l^Sf*@b{b#k8^@Gr2KsM@= z8YEpq4SF!6SgQ*8m#u&ll9EM#PO^GPNQ1{e%PerTq66fu zB4X&|#St)jtn?lhB3t-i^L#gdJcLEo?5iRAk~6DT3pySyFjntQ$$^WsMeYI3=0j;Z z-%>r0!22GVpg-BQ>pf!sII~)rN{Lo8U9VywN0 zT8L%hOUDew3*`_CqmkU)>IRZt%{Pme+~L+b627sP$&vb&abO{p%=I-u1nu3whu;hO z5x=`6(SC^- z1~S+SlA|O=+exwyLxkv9Sg}PG$7-gj|H^nYK#3bi)#x8f{#)U|Q9^nF{*K4?nBg6Y zA)h;~S5zf34zmX4t^Cvd-4PEkS9H`K&f5N#gx|mY_+}p#GC{<2mTeQx=%>Sd4i6Mo z3I$^^$6aITBK`ZKZ~%nouNhz2{_hp~2ld0Mh&PkY>eQM!^KZrRr?DqE^JpgTHKZUt zfmNO+20XxV0hXY@dPHUO|M+g*4}ck3wrk6o@OzNI&Ev<>0sI{Rn}j9b(8ZJdJV_1k z@R1688U(Y}B9sDsV3RFixu^==u8TlE?(dGwbsya;)F@QTkpAz{j?qEBvF{~JRQ+D@LCoI1N~779qgaW9VJDVW zTA`glcT|q^Ulm^j|L(nEG{J`Qa4fxYN76Hb-*rIwLZs?7=25iKJ)r(u;*Uu8G|`U3 z`R`LfPphAW3#oE1a}}*ur;7TIFi?XEN_YJ$LH0#>A)-Xi+{;V&%Kv`{#p6;q5&P$1nEp;q!pH{MhBhMhSZKwoV3A z$^U&hdfO-M>CDnxoi7Z*zs0AG0P=r-b@o#InK=TKvFtyxxZ(%V5`DZVDdqx|mHOwq z{}gZl6+r{YnH(k;;=d(wKA}?SZ#z%M4paH%@~ksL7iDU~{~5vglLLf1MzAv&i-Lzo4NH)W zF(~oLo`!yO4AZS&z54h6Qjqu*saA>u zefxW0J~3n~che$3m*PkG_S2sx)JL?IPvbOzy?L~8Z11iem}$qmRsqC?DUoui8iu|9 z*mnu_k+2n*e|A1H;8C0ccmoxm_EZ!P>e0N$+Ku~3zt3Yx3yfh@76~T`(_CtD zT5be2gO}svvL*27zkRE)+;^n~Xj|T#%=o!3XsOhRk`3;j*J{VQU!C?YHQ(&fPP-*m z7_Rpp^0NtLRQBb*^E2o7%Vv(3ezQNdWj-%oi1ov?l{OZ zTdyARf( znC%jDrhglQqSBAHv2=51!;^yk{JE`%M8vd3W=n7-92(RSM!*G4@1Tpt1V{A{Y10}p z4OZ6j>divib^=3e&y@$BH58_?o6V>VrSeg0HY}QAA|i2o^LTXHCQ2vYjnC;j4+FTp zTT7_7FI$Fj(mWV}@(4jrt2y#v{yQfrprNv3z1NB#t3{vvreOn;BkgkSZ!Q2BEC!Ez zfa>BAChqEV$gFv=#ROhF^`6?FoYtPQ|6=j7C}PERsz8jlSa>>o(IL@^=0nP_;0LxL zyW2DyeJat(6AsXov9`4q)nj?aRHNIbmRzfmz9I5^!a8C8Tj#oq?hl7vMGjlbZs`+j z+e#%WjA&?FYov+=8nUIc&2ABATVv|i7fm0Q-1jZQ(k5olVAf-&wPT>Vr(O=G>&kIS2 zwBs^Q`-dVYPP|QkA^CgEnJ}P_&&HLDXn!kY08%cQ`dTrTa4?B$6diDE#bOa(@?X9K z>t;jMS4w>Fc(|V@nPi1S2+Ed-`@*&{m>-?HzE-!};25EaCBe+fyzGAUjqcN4iw6mx zWlURx$$CU<&u(dxDGKb_5{D(I808-PHHFt{$N@UQ%T+((q=1?T$j?lfq(BC%z4J1}pbkkUq z0&Qj3JW$EXQhB`Yo0^Pcm_NGcjQ9;{&Q`b-ZfF-w?6clxf}s&InECI->EcT0yxi!N z%HfsfbHwpd6f9aQJ!*oSWkt8K%?xNz;8Y2dy~|duFa&`KzMj` z2jU{D_Iq9uG!p?>IVYB70dw0QY zlq+0W^L*@0`8T zH0MvM>$M2Bu@Yf|tLwnzINqsFgrYa_8ryHaj;_j4%)xRGiVhs zsar$(^{W~fU|*feNAyMp9!&2_e+yN7%Lg71SeHiOPhIiqSqaqKj`TWMsKmCi~IDJ@P)=QZXeEe{#fauZSg$ZGi4A5ki(pl8nvfB2!G$aZ!(uTeo?Z3m{E zd(7M%Zr#LFiJ%x<4AdvJh>eR-_1M7k1>n@#@}0~GHwfr1P)yrs6Mz$N*`_7SRss5R zZMZnXP!<0PeJ{^&NiUmq>*Hm#IB?pi&nq0mTBiDq<`07+Msf+_uPYO$}cbcYC z>9HHCnE;J?Ox59tQtdN~(%rWL1VMcEzkZ5Uvosj%v^wW66JLll-C-9eZHsG`ww@Q5 ztF+4AU&kARMgxhb*~{R3Rh0RleNz7`CO^q?aA(GFZ<&aAnW)tGkhtK*#Jj)s4GZZ? z3A8T<>gPCtV6w&LcR7vtl^suKg#r}{%KFNu5JTz8_x5s8)FaJMX)TyDj#ot!!BR31 znP_V5^~G$r_G7&8Q=Cs4>FFku0wwZ7aT(z?P|`Z>UfHKWT46SAi8piTd379rXkTw% zCvwL_H$~pNd=8WiPh(Y~`WdgD)K^&FlOx$L^w4mlHf}D~2ISx1oWR$FiIR9Qs;zD7 zqGGA1X64D; zet~x-h*7)Ytz~br^1;ElaB#9>ca6p3LGbKntO-))yBkH*(Tc?9ry{%?14;B{Q3a}n z0U8a?Vr!UWi(dCv6=KsG`oy}bE|T7;F9?`CWY9Yp!~V)dt-fue^U1pvd*J(sn0~e# zXb$enw{%tnTSiBa8bQ2KyBWG6`8cv-a1TeRYX$US836IdDGb zBW<33-^A`LOEt>HQ{8V(rNGNvT1p_l=B@9 zwthg@voB@0*Oa@(Yc&+P12(|~4}9EvIB1V$Z?Yt2hE&T}63rz1_C47q!OA-1P9@#d zE&x;aNmcmCn2VJXd|JBW$|Zpl7|Fes!{-omLwte1hC1J6l=En^X#g zN&r{x>S(^aO`ll4L$}RgUERzwc1By&3Itg*1NrZ7g%vN5>JQ0)IQ-vvMASDvG1r#J zEk<}Z2o2tbd-sOBPqw~X5QNfm2}EBNz6?|qd>6Sq`?}0;*jz3hF*2A0%+f5#pV^9h z-b`|685woY7Z?uNTc20P8j{2QC5`l1r#$Kz>5k$O-uUglQJ|tjg+gMY7Y)Sxc1lQ#QFT&$ zs>U$zGKqs26)FtiRckGqDV$QWLxqBh|59fU(3gvQX|gnOz$Y*j5~+WS=;I&*C;;jl zp>}~MbDIDXC~&gk>$-lj^q%4Zj}L;8kI@2j+~|LI@J|=^andj{uDh?Mo}5xr8mL!X zdKBco0xrJ3T|fU(DX8R6-uBV7Odp%^mnU6AE4t!pA8HUjkm5^OLG#-qgS3J^`WAX< zB;NO!C-^U}y!HMZM{g9R1VUWT_ooePLThtkZAqr3l?KeF7~>)Ir|^qhq#zJMfA`^o z1mMWZl__AqfAUa2LM(UwuVDa;I#*DEK_7(zR5ptf_HVZ~4kAY2e+^4u)VW^;FR*xV z+ipZMs8+JD*dv@8+x$J()ejJmX~*9}4z=82*BTR|T>pyxIlnv)Htu~|4T{JL=(Fy; zF9MO=tZ&I#k5yIr#*9t>dBHn~LJzVPHog0iefo1Ze%ABCHvE6LORx{E4P}I0c>qI{ z|Fb0b7Q>e(p_zm~DpxLWf~5a5Q1!=i1|_hKCReO~o#DUl#6SNQbpT>**{(Qczt`=L zUws@Js@h{LE{@sm=|=tYw@G~zsDao2ANKjkukZ_6*POk{hjF0`?`p#SS}RhbN6U!F z9=-4>D+Nq1k{=#OwY}$Am=!_597y9QXjA+9I2hmp^EONK}{``B#eQ4(=l!YM9F~ zVX`D-g@XNad`V~Ay0!GJzKEr+_nu0pu8WQ1N==xscg?sRBGA#1`od(vkGUN-?^oQU z9WrdXf-67=OU)KnFG?5P4rtlzmh2pvH*4AKc$_MJxRI!}PxUEx+nR&8sXfi^o}ce` z^bc1mS*Tri5MZ*GLHb?K*5jmXFQ?fI(K@P>3ZrY6e~NQf877{}-W8H1nk;gWCvfj* z%ez2Y&X<~30{k?k`5Fti%N;e%k}xy+3aOX$xPw>IMvc@BO+BY!Adin9!&{YbFK|N0 zyUD!;-#m6{6pnrKPxE$CL+F#!)BS7pQe+R!`^=D{<+hhax|vYg-Azpz{SMlrSyKl_ z_uhUyO7(>ScN#YPlv5QBAtNoL!~{H~{x}t^Y>d;3>qf@LS85mhzpQ!7B&csJ`@r|j zZ|qWp3UWWfI#$)()P@%%0r8RwzXn1bJUl$veUiPb+xXAt_uF;`rFqZ7g@_fKr@E@K zW^;g={Q^YCd(@vH8@B+Vw`ja`ymG85!+d2$#cj3R3RGr(6!|=x;?8?MR6){msM#-! z;bVU{vIKe`uET|1pCPK(*w^n*>k07gZ1GQ(c&NL9>bcAZHnk^R{V^$ehKKDSqZ+QX zi=~n|=k26h2yN$=RfkevO&dw2?N2u!pYUc_lr!{{XJt{uF+{l*x%#qOj?J{(8Oh}l zpYKjr`UYgTcaQX8ssx#j`vxE_aHMD#Cv}b;aM*mwZu<01g=nUpYF`!hW3oIjAUL0f ztJbKX=yk^5O<&6lPqF9gbfX{o2K&o1Be?rcG$KWW8>w+-FFNy>Jy5EAN*L#B%=Bju@KvjAt3A#;Uq5!;%$q3~Vy@Dk z=|H=?QQ4hh+%KM9IyD13Okb&PrWt8R5LvZfp%5-CpX`DXp|XUMi4Ot}d&_S!G^H%S z^^KOkS!p=c*{(N0gPExS6oOUH9M0>()%_CC`R>#_e~IKB0~XePh3T@C)8@HUq~ccf zn|M&a(&heWTrROkcdBzJ$8u@n{!Gh{je%5SeSrra1(C8cuRuq~1+nfKYLf7>m)Ga7 z63yr8dRvmunmxE&)XI___Ln-5x3^1XyTWeIDJt6!uE&Q*nz&Cr2zJ2lsGSCP!9kv$ zEy?VI8;!gCtVE-wwZm3FsWb0}->+9+-c8_@EDd__`Fh(cg6Ln*S+$|vZ>kGWe+sc` zO;xeH!vBRon+kM{I>m^lN4Z;?tu~8)AUdwU@u;-et?7|3d%0N8>9k+hiDW#%EpYQw z!f^=3cxK(ImHyN&XW7c1HzZ{o=RN^Q+}|v7w;wi99)QP;e+Ymrsy>hv0f(p#kp-IJ z;3uZ{KW)VGsnS;|5ehI57|z;XEDq3bi%BJQ8w4U=yl{ZbU>5NJ1-c{Iw)-3@lxnpg z+NK;Rdz`iB8u|*Taqr!&dd^SlchB;cdZ8vgyl=-pN4Q?ukGMs;zbBe&AeA&-Y7kSJ zO=8}-tK~oA$}3(ZNc*sNrr->ZcOG`JZrumo4?PIgM2aAnW@k!#J4-UDYNXS zKhyYQe=Q^Js;$=r=No8PnNNPmsXuICa6c$Qpw?;WvTYe6)ooREs%=s7(~S#Gs%h9_ zX8#8dp1FYLn{o~YnzHdBo?cinR#w(K{#hpLECN25Zll43H7H$SVbEqnvK}RjET(}< zmGzhA^BuRG?4q{|0^_SNufRBqJOo%tmivBj)Q4u(Y<<#Wq>u+mdw zVXeM!JEo@_^oMgAPNxDRFtx}wJ_wYvsh$_TErSkCJ4J$$m3K)2;>r=lD?D&YpKz01 zbKd0?-1^?l*O7j+%<&`GNh}GRAjg@Q-*c7OZb-P?$2`ohF&VWLxS4=S!<1a8acD%D z*xR&BjV<75O>gR%rbLXt_Kt^w7RPVNYjEPeOdlzMwLdWFk1Mhkp7%77hsjphbg{#} zbY*h1z@exQvu`Jdkukvb^w?=YoYU-wC2~*SR?*ZR&mBDSc?rWwF7LwB&OaK*wN(ds zKNnl*OT&k}uXJE71*212t^J zABfNr1AQx#P9;w9JzkwiJT%G+5U7!)0x=;R9u$jl2cmmkX#osC%8%AmL?C>A!i_wm zc{LQ{`;TmkVar$r2`@+$`#x_NotBbyE+tZAPC&iPMBy+0rJ2p=I?*V^#ountvCXmhQV9cnk`pkg*o-`uZBX&5z5O!jRa-h*7>TNDh$SnJ&&3PR(R1Yj15dnXdIQ zpFAgNc@WFwl( zl8oTnoP$(EvF0k@B>_5N3?ZxKnjbf?)BZz3&1_n(8ytrvcl^ZH`I@#MRC!YK#iVn^ z)Uh1FE%kZQSfT3?yDg*`jPQgy)f;=r1d-?GFKQxeghok8 zV-gcV&t4*F-Z>1uqtZkHLb%1+#l)h2&prsX9Vw(rl=01-1P2GZmlE2^v(u(m z>fZ4lEJ(Ob!MOOhhTpE+wWdvNCn+3N1@0barP}i$?-~ICn<{1}u+o_0Ii1{yqL>j8 z&%V`KOVSE>nrFXq%)@jh1 zH~sR}jMkFb`e9Q#myF9z1u`V`FM2@;nwFOJ@P9;+mb}dNiq4Czkbr8jh}If45i?Tu zFpBgtbH4!+xfPztM1{wzzkj${%kqetb@&L3pM>donx(7SfTt`s+fc56O2wM5EMx#- zvc*esqs0`j;o@^B`#lw|KaK%~sK8syd29qJc5wjdEA1R#mZb1&@^OBtt-$3xrVyMv>(C*6KSc8U+Od|w zi{qPsX+`?D#`Nd_W>e8v^T`dNqYv|wX0*7-xnS|H+Dt`<%Tn zm~;~OrN| zYksYCl?DF&>l;wNcqtdm5Axko9#dRBXSor!t!(U;mI{zb|M&<4lIg?^DuxcAopn18J{x;(^G2aE_6{C`I!Wa*E-4Vf=QZ5 zNhe+Zvd18OUUBJBm8t$LG56|xM?A-#S@lgKoUg!Kj?RT6*sdk5@qk^DjrINg{mJ~I zOw|$Q6&r?&;nYW5`k^kuq_C6u3zPe922X22@| ztu%dDqJ<`&o8t1l85BUUUAl#p9psiNlM32z?Sn~8s!Qch(%`017iPF{;EU^c#>>OW zMn4{3xcYJDYu1(1HOiYY*lPRC(kz0(qpW0mm<0RrTw+1-(&o!Jlrj_5Q;2-yel;9a znMl#+Nz(6qFeqckm(G#LkQSJ5%7Qp)WinG)FuLwo&?+_rdmWnNv4^x3EIBDVDcy=t zT4j-z>l__SQ3`V`6O^6PKl)(7a88^W$C@c4c_E}Js9~?R2Nhc`KjW=M7@j-Nqq*Uq z$-9lAI~lKuJ4ljM!$*T2Z8%Cc0A=Uq(Z{iTjFBMX4C=JIxTf`Wc zU=u%8Gc}2SgNcgZiwz>$rEBQkIdZf<&K8%yY>7RnbJMSrDD!eAj*ZRTm2cjh*!}P$ zpWVFV=N+PJD$Es3=}ooCL7;(utFoYsfLntRhrMaA6cXo6n*YjRLv{9aitEDGCZ}i; z!3Uo*DFSgf;e2qsYoiH`28)m@`?Q`S2zL6jD>jTBcRn(sdbRZrWhdy#*;9@pK1HiZ zt~aJkFOhcTQJb1DZ?zC=XlWM4V4{4}%*y&v7~W$gEr?FK3yU&s}9vB_S!l1zJX7t(#OxW3(Q^=MwH(dB@jh2+t`;Gwqt1_N`@& zhl8W_%{&~&&hpvr40@UuT})__T~)WfPT+eg5QsJNu_s=53*Fr!6>rdt{%lV&GYU?Q zQ5NG`ZYbxwI$QJ5k%Xj~4WK?3k4mG0RbG)eZSN^=Qix3pH(d*G?0x+vFY|nNQKm&C zjNlqVxFuPFu4nhmPWt#`1^<0ayQi%ck{{q?##p@<6AIG9k=dODmyKl2+AXlKR2RC^ zn6j>%j6I(g{%9w&t)^g=O|3iHhcF2YTTO*ExpKpZ`QY$ftGdvd<_*JI&LrG_D$U|a+pgPAvCFw$_PfI^YoR`qkzA)?AHb3;M)kerWOoNI0;Hd9x2 zRayff9O;d(yWcs2PE5S-9fxr^lri8WZdnOpIgj2lCOv!*t8vwvg_thEC#O(HI)-mm zotE$KYW(7gvg?qS2CZ@)lS@^sh;aCY+^^vF_ErFNc0U?c>$*$>k$uY{1SYUV*M`Bx z#Fdvri3Bg-j|ixW9~SZm?90o_H7kZgS=l+ky2Ulr#P|ek=50pH>EO*s+w;{tPh%Mw zYP~eSasP=+f}58Y%DU_K!ac`up3z-CQGX#>G(H$g{%bc3P6na0Z8Yibrjpon?^kQe zeWYPD?FYBM_^b+V_$iSuZ9D}cMr<&VWNo#Xcvns$hB0X@@Zq8c)fj1BwTU0{5kjE3!u^IK z*2S?x+ObtE&Y6gX5(a*dgf%yV@;62@%UnHn)tv8wYY2Nc*s$LvRoFFS3;S$jYZ$)- z>gq2tbxAaiS}vYZsE^?h*2%MTY?U9*rPrQOF;rc9zqxOBdQfeK7#wLN`IK`Qr=A{@|?o*{!y5+gg#OpbWC7Gq&zcgZvG|YNY%To3q zf^scD4WO_XWzcdWgW-7Fv@n$NvXf>ex*8BA2mF87h z_%F_0n%0R$3f^YJYM?qhT(?(4*OoPSkz0Yeqd`Bo#S9z!i$_}4&FJxo!f(`3=C2!Q zzd5p)JSu$b=0BXnp7U^S(MmaPeax zGqZff^kbN8iOeIvD^Ajq4QIX?JAF7W#RMwmM-^kKv63_K%DsIYrEd zl~z-1!dgmRl0R$!EP(qg-7?A&kHrV9j(_&lo1oXRC%D_@xz6O0m^6;(epK7To$I~2Sbg#Lt919chg`#yufM5UTP7>jzNL^LA|1SfeIfV}{n0MegM*faFPs9>> z1{ijC!u#|q|9nRI2JcjY_U3JxlNCA{o$P6#MXOF(^>hDx_UgV5o-&U+!?7s-HH>6m zphW@8YniY9jOqB(*DHX{Pa__D|KuenOVAAlTBH*=uW|36&z_uP!J~S^1fa>s4V^5F z6${Yf|1R6{)&N%O|Ie}|kodBpU_-HUwdOgnXN6}f-enHL+789ws|*YGjVj#gRG47J zpC$WxB{VwPij|eM!1Vq3#{P(nbYk;4nwq8d4a1^TP|nh;VD@OU@WIWQDVq<}jfV?N z8HOkB^W^80@v%KY=)1IJ`v$A3d?li1M$6E;XHKyI!-Un-&$_hmd8@=~qsfF~XXEx< zbqq?BOYn?uAboJr8Hqf7T2VG8t2^+Vb_dE3;IzDiQhq{bPWPMVgbf{PNzQfX!HM zd|-Q8Zkju4-Al|{Xm_eJBX_)VSOSKt%o(a4iMxcMMO2%3S<;gpPR}(}7P0p)RcMYk zVHY>M_iP!bVU8%?(`!UD~{t z&xg;7V4m3Yl)mHDslH~|rxfkUub+3T#Kiv$O4@n2(8S-1Z-x`&JtSQEY)>DdQp{WY z4sI$*P45;hp&iVx z{&I%}(;8^?)cz9ewTW%NpOyEXT8H>-U>Jy9b$oOQzP(t49{urJ$O)%TFM_^+8M@GH$4LD<2+?NZEJz;Vm_+sppJ;%vpZYSQ+S5E)Nn*Rj2{&H_oy zHznJp?G7&k^)VZJxap+0&4ag>?10?9LYesIgL6r{Z!ZbAxT8hygv+>*&J0;<2l!A!tcw%jCeKazypcFV@VxdRoV1e}9&uPty>2x&+vx_Wn* zJI5?5Qlf)pzibTJ);l(tQCM0#@p;2*=N3gSVM)CtbA@F1x2P!@j9JO+;jNvhcyb7= z%$=(~KJ?7t(l;L*7H6GXj#Qj)IOX=#{^%sqVT9NpGLZlrU(gYgcXWcq1BPA=R8%S~ zQy*Gy6axMP;?s~5uNXTbm}zi4fE+r&oVMc_KnCsDW#~c_76K41uU=`fo?TTJ6`!)T za%UZgZgFR3LhnLUe6WU&%|LcZUzX;Mf?kNY%y6~aMoxNqC%G6_nf4kT9SJ_CWkgt*Nh6cwV1M4Yv6w53u9NVKrEddqC2SZ0 z;-HY-l1!{2Kml|e)9GKGPr}-S{(8;MyxOrUNNcLbcWnb?)KtX60Byg9tl+O%gW5@3+6JK zB{PU?jl5n4#1zGLdc&6NxAyzy_)T`t?pL-(P4V8?~KSD+e`$V7Fu6p!+a^L0b&vcfL?|5v08O&h+WTj=~@-9cWWcYgJreAuy z)$SJmQkM+V7RGy{@3z?7=4CXlO1hOHOD_6G4fYB5_3H&r9T%UllS#C>qi%N-exg#Y z_!Wt4UE2Sl)}aR;XfO#fJOC_*OIFArF zx{HA_GiZbX zzSit*&`$bxCmjln_(nAh#FC?Tr9NNLq4GGRTaab+P4)amPw}oQ}#f;tB?Rz)%l>& zW9_>KA{lM-Sfl>b78W87*_h)cA|tHCEyA#DsLv|hYJ=mKb_3q+m?!VIIM+yoQz;V% z2zu={6Ad8D(~KlterWZ>J8q^{*jsaF47_1`q1lmOr3T*`qM@aQ%@V89O}MkZOq1D1 zY0pFW2D&E2DM)j;?Jqy=vN6r%fy?Y`&{bD1wa8;159mW9pkA}D=dKiSZs6joR#4_7 zb|rSBEhtN_!Zao*TX9D$6q`j+>nxgJVi`*#a6>gT?)efYeDG$FS8tCcI$OjW3c^KR z*I87c5Gf8^y_rH5QBoq==>@y9AZDahv;TBnCUs2WoeKr+S@h%~X5y<(Wqf{K-IjXs z#gTWIImq{0?!i!8~tSacx)Nx-t#O zpFJ|$sllKt>8`04^W^bt`R7pxP z!Yhb%vsVB6dWGrFo>+kzzo@xkY=;V2J!vJsDut=p%o|SV>gW*?+B6wHAFeP-vpR2o z{&2Z7t^)^;y8Y$Fz@s5LP~SFR!;{PliF=U(_G7!GNr&rtx}Rxy*`M&!d?4A*W0_To zq8p9g5c_I$>AP*y$?zbo88RjQLK#$<@_w;}BNoD>k;GQOQ7 zw-fx3NWiLla5Fr80$F`|Zrk{qKgIZ}2kw)@dWCnlF_}zvPemyNv2t3RITecubt4oH%!(y5r0b{ zgVhdqDaD}5bv7j@pp`&61`(bVcI++mcM z=3Ku`W|PGgVWHM_b7m&3!dYOb_(NQ~pxYBvoOenEbj9TYo6X1mQ-m(-iZN+(jl%ms zI*Qa(41$twmsz4!aDa}x@BC`>`Fy<$)@^eI(b|3ovl&EgB7P{~#I#M8;p5qh`_$K} zb*WCgpX5lN**7W)sh^WYTw+!Ss=JNfy3_I+RAztX`D~cy8Ck_{dtdAa*Wz0>V;ONF zLnnI9^2ls;4L$ZO!|+4*>VNhKFXBi3umAb~+W%|{o-zL8wRS52rg@K}qazQuSkEo~ zZ9VPE5@ytIr&gBP_veQv2n}`HmoHyx>g#`!pw3GYWW`gLp^xl2!a;n+L5>@aykcUA zeVO8?^!t}a)m{l6j-WTr2RHvABGzmNwGNnKO+dMM9gpZRwGz-Rx*1{a473Wvi4U9{ z4fghqizwemEe#f2ZL?N%KExnop}y7CcUqhH5Vf%#lcptXdvRf`N)<;_6v}rBg}uBT z#%D)z*d1evQ+uZ##DCj;#KtNESVDgR`RA`Ctjy7&?{~x#_^O>flyYjK`6BK3=3QGh zRzp10SD4x{sBldgd_lryUG7NT6J0X`AN?E9=+8FJV-(>$M?#v{GS_fK-FZ%uBV>W= ze5XaB1WfKVEm-{5Z20lsA*Sj~8gtvU#jij5Pv3_Go6*@F=BhtI%1YHDtbe@A6F`8N z8nCgI?MS5m2_u}p0bm7xHUF2;TvG=+dAEY|>_4Fn*=hjK_-mXj$|+!2DP(CDVV57K zDAKhGi;A+SsHo6jY9v2y5$uLPG?Qb>csON5Ecs=`BhOn=k+BhG6yn=WOljU<=AAjrNw#t04N%^SmE(unFfhcz?nju zZAnZw-NLM&IqvGK2SDEi9xnMGovY4<6X^JHa}g1dNLaiDkR}y!YtCboY(RUpwq~hB zYb@W*X)bDrrWIykXs3KYfH-%hGhrNSvIKW!1G&fuiB>)T`7ZZzf*vTH?M3XnWd2e z#q!8Fw@G5y?PYqK&II510U6|GA8}Kz^wzi_A8;Vy_2I46gnJd=b814Od02TaV z-X3o~-VtrvAmP@bD7n$wX?OMe*)oSP_}Q9)FDPNdsn*?&uzju~!$#|mmxzzXxtI#9 zb>&d*C+g!7fQ?%tdA-|XT`*-OfCHQIg@GLDmR_?xTqI#!1aQdc1s#hiruCn8?Vv|m zm3F3Z-dkR&KHY$~bF&Q<1^^xucb?@@EMd_D@G_OF5A+-yUXRaZ6&S&4`?Gayl4QIl z2W(*d?=Fd#j<}EPcfet@yw*Sf3nt)D*cL55Aj;U;wc@d6FEssm1AFrg&Gpjn@91^n zuet}W-`@YSfz9U!CHVogATP0rI>k~);7%&)yyGQm1mMTMf-^JEjty)fUnKNd(eg+| zk2khQFUJ|FIb)9Qyh@=R3FF%x*x%Eme>!bkg>Io=YKoIV`0bD`Hzft<`}&_5?B~^6 z7KZP}O5#)o5VOMjTl?7#Vc*g52foYRbYy&mTx^-{m#*Ixe}D?(ZkOxKQLh>W&=nNHBQX&g$~Ld2SH6Mqmp6p}Fgb#_)xOlsO}GBNtyv zvZ(7)Zx>mK*`@1daWPI*OUxMmr0KO8=ob*vqTpliI1#UH1-I8qUlb0UB&9FsjR!*hO z>M!u?wM%#5_JN9wcN<4M_d=vw;g~k&yR8L0ma@^^v@>TZV0@21j0-*O?6Tv~DNu;L z)dtE&7XhHuvfrDIt%g>n^3}`Z(hKvWzKPNl;$I_q4eaQJY&RR%$qt0yc$H>*u{Cdb z{Y@^K=sn;$ZYRAPmce}JmiB3^OOp{(kw3Qd@_hpQynq{Z&>Kc$c10F##1Rrs-+3fo zSkJZJ#VkN1DRL-`K1$k4%`e+S_mQ3J`<0K42>f7uXSR7x*D#)jKP{)**Xe+oTNd->(`; z?i8sh`r}8Yk+clyX|GVuX4d!%T}j`!`|{xHsTA4<(5aGI+RiF_`04{kKy(C&M}`tnZe93k+Ljk(TUagw*%za~3FAb@q9t$+|he-pmCyt-eGh;v`L=`l5` z?G?qfTU-Uc8{n)5y&v`yM6oqw1-Q(ioT1O7#7+A?jXxuqCwx8|R$0O|LM$4^P6UA#6pGjuTiTaD}ZYFWlyJLT4MbU#Ed zu)STJMWUFYBEkKW>+H+gYCk`GG2|!yM;ki_bW01j~ajkK4q>ugN8) zd2Q7g?GL40ZC&c1KNQ+8AU_t$`uL|2n|kdBCi>5PgXHlYWs@V31}D4)OG%wZRJ+MX zJD2RR-MG<#cBqe^X^k8(FYRl|y5CA8k)I^f5q79ZWu1MWLxs|Ici6MMAK}OHxV0RG zGkira=bJPJC!c}7D1WjOB>K2n?#L_wV+k(e(@5$$9tr5tar3)9)w{s){ZQa^#7QXR zNM8f8{F5EfYTl*O$6|7%+7C>?fQk%lAlx{(AOR31|{)+kENo_Z)Se7|=VM zU1!zt|M~1yJkaF!&>yWQX1;C|M`cNpd1!e0>_6(a;OG2>kG22XmLjGa z1Z-ZNAsu3#_n(gjkZ|Rc7aOuDI{Z2C&ey_e_l{La0vI?eW~QgF<=+cBwuysIEIT4R z*0E3LkJANw@xOihrm3!O>gMJ~%oB0kHj66wz+^q_3W3jgd&SpB6A)_b8!P6b`yk*P z!NYv9Z-?grB@xGVQpXc7wy+OZfhiSP2@g8nc{GEyUV#ziP!xS{*{2Z(wgAO^$-{3g zdEvVg`AayVhO#=0y=cNdu!nYWz;)!-gVIcoMF+?Uz)2?A;e4}~9<~0Muz&ozFHPX! zp>K^GSP9epq9bM_@Uc;|{m#;~mM`6jnfm%dClu_u^c3nl9TNIlI_=u=+*IO0(GT4( zDF1VLz$~>u9PcMme&w&FJ{GWU5HM{u#OePO1{NSX3i#?MR@dV`|FMi&7JwE%j1j2+ z`P0d#S9u50u=_#mNE%MM4g?gR0WHee<8z((B7Yu!)dFPC)2o^Bp97Gs1X@fMK1Ka+ zlKvlTz`urg&A_MT$CLTvS^#*;-7p`LKQBD_^b4^3sf>&qft0KN9O;Maz@jfGfI)w@ z*6}Cb?E@Gx??%N5iQvzx#U+d$OFzr=l$0et9xm+&U>{C70cHPNsdZ~i{FJ{@eBfAYIk92XE{frt_Pxhb zF)@C3SG~3)M=^Ec3-0c{^hc7i_^OTqnwfc({j1f9I8EZC7H1IC3*!NKg+O>^F#TN8 zGJ6>|`^Rof^$v6&GadAbtnAdOnp(K_JhT8c_>N9wm`&-^6F|MW9M~b!Arh2o5kMA*&4EH9LkbUm zlfJ&?#yC_2XB@KeIf(bAR*PIb7V-20JF+B0&hS|K72*R{U9nAGb$z=e=8D*U_8U)e zLh?Jq@ZgL5BNT|rsXS=0RT2A;h9no+Y%zdqkKrbq;dZmw453-rdzVo%Ns`#9#;X!* ztv~`5TDlJKK$-)**Wwjwt$~<-I-51av3qL8o9{5iX0)HZ#IrwK?r>G|E%M8o!0kyr5*p6! zB(MTdLLc4%u{f!=-mey3>lQGhIcxYGXxm=xT_#>1x&M=!jZI$$}q4Z?~T%qvl6?#@weE z4?>(}?kMC?lnK1FHgK!#!kO470|s76bQeThryQag9ib715hBTB5Y=6CIijpL)<%397hQP9(xeD0#baC9zn{Y~}_3URZ^Y{g}Sps73on zYTbCG`yvN5l>kvr&t1Ujg&zE_EqEFnt&<|%eh{-%E@Cn10}^&1xMeH$I5RjlJ{}Qm z5&t;0ioq1VGrB(*O4?q;uPDMk7^`)x&l83LDo~ctCBma!3Kk_<;<{d1V$)w~C^ka~ zz0zUjNAI5UY*ayV^StPECsV1>o7qyEf$K1V$BxsNHEQE;Sp|*HB|UD~qvRtyjcMkU z+^TsW=dtK!GnAQPMVZIaB}QDmYnp|Qf#49FoQ zpI))$Yew;ec(aN)sTfyFAcEJw^=iyT9e#BwRJsc0e#WYu1tqFmA4!Y3Dghxo^~b-wZEY5w9HbF$qRrxVt}YkDD-R zZ3~Y$J`cfE_?O zuBKmq_4{~F1Ra(-A_`#GoEI=m0zbEeH|*N+6`+9R+yln8h2EzIw*r9o*h)Thg$ zKq5@88DNZsGq^rH_RI)~(=inRUR@jPQWI2%ClMHgzsuAA-@wj*+3xw_I2PMXdyGy4 zjXrmJcue0QAepx^stR&p{Z}{UZ-5$%586z3WfF&hpB_0p3Fl9IquvXDoR_~0YVciP ziCM#8Uyi5gkAVArJy_!Zm-zSMXE1OqnVH|;8qaiDMrZ>NOy)P`SZr2=!+DqXj*hZm z!e{xjyfHi7{QUe1MjrCkBgyZ9ROB#0OXcx+$7xgoQg&J4(A-HV1r@FrBKK=c`B{&N z;b0aG0!Jq&$XxrEhgm(D3i38X){=T~JO7CA@E7Q|mX^ZP^n#C7q`6 z-}_zmg-T5@Dj2YmR>@mS$s{=oV%S8c3N9_-{)dCYiEdg6~W0FlEf|B&(J!YvY#NgSE z+gNdO`B$3j+O|RAN?c zRWA_Jf&BWy>cHC6+V;Z}5(3I>w6okMuYMXF(| z%Ns+D@rWoR&kMtbAU#X?Go$&wIX-T#VKaL;u9o*Bya`>-302ziQ zuBYz)4(Zg5T?XS>u37?BHjX5b>zwv&`^51~AEoEk;{^uukgR^*yREv)9ns6VTOsab z_kl3UsQGF&&KCf$_=lyfA(9}!#lF_O60G$H6|uQ0r$)tt9Bk4TRDIXp6pKGVFUlR5 zeR>t15vvXnKaiIpmotD^La*$^u3ld0m<)qql8H$A3X$nRtlH$G<9*>9L(JpU;1Eug z0Hh3*B|aK<9T^Aol~-QGo>+0ww>OcyK>Ae5WdWdkH_39=%Uuv^k20PnN4MPMmTg~4 zA9}~aF|;z`_cSXKO-&^BTly=Rq$e{nu?<^f zM*SjyBQlh5;mmNpt0eh&aie1rkm;;&mwrI{9?GEPNl9Cr;J`tC^D3gmgBCgEE~&6> z{=rMQ;#q8fORU2RXRSpKkOHi05*64@Jz)6LXxwwgKn#QJ$;Pd?*%y0{ecBOn|Edia zUq?vU$qv@%x_)psrKYAC>Sk@V?S8T9IdKmY=Ynq59Pe8$p&ch-9Ih|vtD6cgy>z`C zjT-!wh+>a(&t5LFzu+;mBP@;d1C^}aT=}RZ!SFa|l&?EFGxmf5|C3q?Tk4(8>!y6r_Q@F2JgH3LA3W9^1s@Nlb1?+xReFF zquWiGUW#^tWZJ~92S{elY3-W?uUAl9?Cl5^t;}n4niF$3welziXb`Pgfx^x0&85_> zn!|5bEi{Im6x^65-;Q}WS)!L+l$@0vzfO0T6S>L; zH8THbrZF&SM`!q3N-{44+!Jf#0`8-uqgTP#z>X<=W`#{lc(7jiMq9{dm?y}Zgp`!G zT38w30=e@QTah9jh#%qWkAe9~I_cNl6Ze01)hifAFY8ppVO3}{li?#3`fOv=e&Sr@ z48FjH?+T@6*9Q|tUbj1*x&Vs)qNcGExJq9OWVEtja8;Ki>iQPyf+t}uJi^u^o$Xcg zkC@-8K9!Q9C38RuPY@v`*JR;&v)8#Cq{#Yooc1^vUAu2aBGItTWh&RCyfN`A&k1SF zV%F0+MOyI3<%5_JuU_E@GyP7E#j5mPzCho7H(tZyuT-95KuLEm@Lf!v>&7L{^dzu; zD7ol1o_*HN3%}&jUItlYf8@BlJR~vif|5u-Inw!6j*~AvNVGV&l|UC~C;ixSyF+hgMYGeHvVYrd>DoR_U}{7@=ty6o zulVjb(Qdy^=o5PkT-ttpq6jXDsVJh|riUn;$$5MSsA?iCULhwZe)CIMz-IN@W|qWu zBFZaxX$JpP;4aI(PJcE}sKOV|rpZwVsznezo3$Dsl)*Dv&E0fsvFFBJWEKoM4=zg> zqdiEh^&j~pDJqGoe)Y&dn0~@vaTUIo$m?9A7GCR6W2Gpu_Z%9Qn`pB*dTRvrQ3X7^ z9d-ue^O`N1=~uI*+Fq~CgZhv9wAH&@wsG+eMqWt2YlM}=fvyoc%!Y)hYpO~yf>{H* ziH`MrH$fg$vSK@LFDy4hC6ON2I4u+ELh&Ia<;A1$99OLbsd_O`zTaaHk|vGF-qx8M zt?`W+Ye`UWyXdNbnz30asSklTyjTH(YYx2XrL4T#hm@p10i1_A0K$$LnYVYw~@k`?# zHrg*wc`o;4txlqA5rN?`fRFUB%_ULu$Up_a;FlY;nDhPsE-g6^^We_9x zq|riW`F$#O5dW7R9^s`2(%yc_anui`SgvR@S6dIhSDpR@25L+c&BeQat7Y0~P-a`! z8pS$43Z;5}_*Qv3#}pBFswS0A)_Jp}IF^SoTe5OW`Kn5nKpbbzuFQ=G z5}v~<3T^|XPq?AI!gd!8DY0E7Ack;wKtYCLjC5MDmZnvxqAxjwe!)-q;UX$28Snjv zI3Z1)?h1~1s%s@#0TY}hV_xm`r5S3q6d-7EZbTIg)eDl`_r~xWGSFpO!R-9h3}=`s z1=)L&Vd=a=pu*;xRY+=e6b~0!KjW%j0LfxHZ7_b2k|g>IQ+<5P;%B^13~5TRb0j!R zs17$}Iu*fFER4>R)SV6`@i~o@ifP^>nDcfF`vP-jrNd zPJm6jJ+2Y=2Rf)&2*+XZc$eQ_x=)(QRx?o zDwVu&7TNPlNr@sP4D;uvW75*Alj8)Xx)rM2%Q@bSGAL&P!ge(YQ22%zdGrAfSX3UeC@MtJc{ZiKqFaWDb~le0uR3AZ@Ge}fs-P_sPu6rGV3O24Do`U8h zO!aUUB?H()p!9(8c;%#POtszyi`B))Kv9rk?v9#sOS&^Qd<)wWkDa(dgo4fu*zguo z7b~w$6Zx7~eiPCC!uE!A>fMf^y?PIg=og z$M`y<28F4J7E=5S9?*+$BHZ(UaNKvw{BS!au4BjlH5Cz?bANKUr0)?Lv~Fkh^MB zyFqKcR!BqT5NEE19N+sqe1SzP{s9HsYM|I9^F`8C74_<@pN2{iu}XZaos-vnk}7Qc zlEI1=!q8B*P;x)g7m`2%}l4AWa z(}BcCE;md~O^0`Afzk?Cs}J_5I2C0XH%;oguB4?77w5j@!SSlg{_ZVNE&ZNyT#^?b zT+}faSQQ+2A5fAZL96ec8_)faDdv&x1uepjb=$G(p|TXa?*j4RxkhbbDVHs&W8nd( zxfU{PD(qhUA6r5|2LD+i(!!k+)YC*H2n_>s_j%pu2$=wDi%6vt2 z%2#oc3$CZm<2{56C4E0SB`MiYk)^3nTo}c2Z&SwkiTDX*v(Ix<)z);+kObIel==J7 zxqo$McMW5{<7QigG!S^QZQ1iQJ6#pDqvc}DN5wPWm*nNVa^UHqT$Q3!JIK5^6B1?K zKrspn`S@ge-J$Ft3BMh)+KYnyk`)Ua15OW>czZ6SJ_}=>E~o=p`{9K$X&CCz-NZT} z7TFRdnj#o~cqF;MbK1CdUvc)1&3Aofy!8!i`Rn)o$%c}2OsNxc0#?)P@fevb?zL1A zuV;*whK!(z-Ukt^Ab~NrJ+J+X=-lhdDQ@9ZKB_G859&M9Swhw>o7Pvh7)tJ0Ca*fM z$&2&K`{sQE$_7oy@!3B8xy_k7!o&QmrzAPK@dtDh$4+gY_Cd}CckFEIuT)2_hmP)3Gp@_YJs}$#77NW!Nx)v{UmVOtXM@!!QYj2- zf<;QN_)`y1fK{5EU54-dOrM#=J0J;$kO!@!iQHTb+pU@6AB6Rw~!xz`ATH zfxi*52xw+Xqnq+E?FO8n?~5;d$8uwZzSYt7mA1~9Nzql8*9Zg_>$(4N)7UVooSN$X zcM{X-@<}Ob6wB)?Z$7#PgJ+`L&R8BhN${cgh?d&gQ z-sd3$FgBvM8s#Qr!j`wS5@GtH)fT#@nv0jO6%CgM^tN&|>}A;*c8OYU>jYG_b5(s; zofLg5Fe{D^6vkqb1YB#d*PS`M%(ji(6$>XwWR39LeTsyd3Y5rkz2lQ6rs+%HtJ&N%w>~oFTF8D@Q}CC}O#AV3D4?r2Fd)BAGjnmliaO##j&vt? z_Giyx3F_R2mc>RP0#zv%qFvz5NkfUvCb^fIa&4jj4)&M1cLJZ?V@)_uDNKi&DSxH8y)GGR{1!1CL6+-dRuh^ljP@pUL{=CFrA#?(c3o(5@#Ssx;|%G^M_ z-1_DF6YSv>LAL_fsdGucN@6rq!RrvjO@9wS+16jbOC4>mlU@O|wB3Wm-}utcssHlq z-2q|m)w>g1^e@0~^8nA3fZSL37nqoim=Z7Z96jN`fXfS40pQtdI4I^{h_Zs`S@6`a z3XvbhB>%h&7}Eg{c(<5Z@FYfdjJ2F1u)YSwe;(K}9D}2OjUzb(?6RvK@#kfN7dSX9 zm5WY`9FyEA)2{*(xF{>_NO$x-<$}7|Ha)JfCYmD)M>ZV~`caOoYHX5Dvi)OUFJpQ8Iy``Vq zN^Lhh^-7!_-nCPW(-quzR?dc8(73$xdul^Ve@9y)k|E0n7}(($m0uncm8dO61;p;KMbVuJq8=*W-M+spnAkk6ku^(z%H-tQ8bq~FgHIX9FfXL>35W}uY6{)UHLauQ3R%JRhj2&HHiCgXiG#YTAoejKq`FI z4>;YKywWvCQ_b=iKV~gj1e)2wTV%=B`n_zow1DXx1o2gu{4o5I90Hiln^xS`$1I<} zq_p>7vEL2eCi+{Z4jYzb23B;TN3)me=$Wc43&6)VbVs>iHjHo|*K~g~{g#*lpD8^b z%21vl&AVd(ty*2t%?cV`|E3jRB(}1vmNO&w;P8FPF+exeZO$#8#J(?w0%UPQre_H# z@y|+MkccX!8%8cwdk76(G@Ka|HTtv%MZJ;)8D<|rw0QYK`?TGWfw2VLG zA%-`15`F891Ox2m<<+fmNG$ohDV=7qc^jPvZiseiC@r3Xx~l|Hk4_WA884&lZ7yAk zK1k*9_*uZdoJ0|b@md_nv!IZ`QXu2TD;3?FNn~Q>z&uH|0NeaO> zu6_oGbW^O#`%8vMtZQ4wj$a1B_t0DLf(wdw+UW#1@_F@y0Lr;Ug$%DvgZQ1EeuysO zjors+_cJwn{Z;5F$#JvTs%I|JUvZWI+qNh>n@6wU$xDD#?4Rg@L5{B!1GkwBVOG5+ z7CovJqVIgOtJ*=Y*|zfRchW2%gv=CQ3IART&I9iM4D1!ju|XSuq1h)@3`~s$jE#U@UtfQT*2&3wpD(5-BU6rUeHN1UnFzjnJp|`@$F&R6jeo^G@lgxFy+g~^C#oW83y0ztNoeT zD&4o*pOYnMKQ;oo8H8n!>`CibLtHPnl(md^>JUT3L3ebj79#0wH`BYk_cGFAZRzk+ zk_Rp*Nz~sUtWm_mIN&b{FA9vLZz69p6LgoVD$G7zdsjOm~wO_y9z2dIq4jc4hMW47j!lmdqjvK2ZT+{@X$%m(%BYmpr*4rNMk zZ<(Ryv`?nu?$64AdrEZTYissOC9x|?LQtG1W&t|7!X`F0Ac37e4V%P&5VM;4xFJBW z`}J8qg6JiY)gMql1-9gfo*ZM%Hu*}Hm4TGZw{Yt%DR#_Lo&MI;Z ztTh`QRT~t5_9)ryWq7O)PPuIQ;9R~(ry(u}ZsxPfM-IQ2K{+d(ZY`x4qo-pq%ozao ztaU7h*x~rz9*i%QGo9#od__Kfg2MV9MXOSu@e~j8gxX_YqOndlcG=|?M%a{MezKqI zXW%C>-4y>EGQPjMHDbeW@uT>F{BeKIXz}z}ppFHAU0+s=_Y(cmLlqTH&c0#)dAq+# zDUT!`O}$CbEs+reR67$k3UQJW*oXs)zvT}YdGu%rUnJK8WNQv?H!6zhiWt~6)FYYQ zTV3^rsKH_8Wn-HrlFVut@@213B_(pqfb_nQ!dZi`@D+$!nA7aV--q=yiK#$~y=GDU zbaRey$_X1ng_x>b#0?+_sV>yYluCSywB=@;Ra*#FvHQ`{u?2Xme2Jfv@s!NXA6OR7 zHSKNQUVlJFzay7?wJfC@BZR6P){+m@&`lX$gN&w@@H&gzbIF%X8tUMNHR0akK)kc8 zWwRb-xtI@h&$tp7J`b}Zq6wz(apJ;FYGn$mcYe_OEdO9G#@HRjROT~-Ku=! zc!Phe%glL`Q(pd+q;N|?J9@z|H2cK9T0t)sD=bGovO7F8cz`n_LNBoL_{{i1Fq<9! zB$~CN|5_S#Lp_(uKCt2``5dnvnYgamLIw1jVhK!6Lc!0V0=ixv->?8X?JKnTh%gRoeN48mV5p54=lY* zP$p(b-#K{c+&jRwu?!zPOt}VJ5>20d(^N>0io|P1y=AJ{=XB*!M+Cffs!$?|DK`vpBxY>8X8?Y zJ3D;&WRBxymi_NCAC>j>Z2jCo$5*U;}y`RPIiZ>mr&S$W+)QV3<8u=7-16; zKI>rY&qFJf2(Z!r8H?Rag&!x8u{*`l6{qR<6t_QCkWZ=oelDQstV=zF0T`L*M} zB~KIo-lZR^e{!B=3CL!H&COE*UVi!OM1Ed=fA~vQ&G^G=_h`0L;>+?terG##fEO!# zrYOt^WPP#;t|@}|PmLBd?14T=-en_?{cvojFW_T`xc`@T{g;FK|D8Jje}i}Z7oq-t zrps^dFU5jBF*HVeA76RM9m!*U@bf_L`^U=?b04ZkK(Sw6PYsHQ-Q;g7H4Qb@`S9%r z@biK7P0DD^R-bK&yN88~x#pXs)n$7N_sT3%Tp$1F9x6+V%I8)MsrK>BUH|2tkyGbo zjqs^_x}*Nvi%1@^gHLhvRQXW<$M=qY_1$Czo&z;}{+oMHnw|m8|Ie=%=@#~wRs3^L z%8)-;T|NBk*RQgV$oE2mBhxvs79o{Ik%u?BTnoKKaQ^(g^XGrEUN_vg|1Jk4iF@}) z6!XaW;44Qi(P*ArKg*Itbg%KAeETO6R*PVU^USn2e(B~L!WTi=wVfxIkEmYrA>>fm zowz1sQG9Q%omJRwESz50?xT}+keH)Mz=c;_AFD3TRGe*W$3M9orx4$FP`^SQ`-@o8 z!J+zE3yIek(Zd@{OzrV+-c<1mPh4MGA+k=h z{?sm}6IZ_W^!?F=G^z@CbLyY; z9D=`9(9G%U_Ok%9{?>aNo2B$<5pWEeJD|UwUGy;5@76N(ko*1?b1&c>!v}|#=93?W zoh@yidu|S4bjl;kGKIX_sfNnEa zlhINtlef~H=}fxNS>Tn44k?bM6ST8@Uc5Re)BqE*!_wYb2>jJut zeYFMM1i&;rm+QCG3s-$4PHk;=2^`(p$A(->H!k$p`8e5gxvS`UPDWhVUHf6V+N z)nZ6x_E7O%J^^wj0YiQi4(*gS*0lf5)lg09>UUW0s>SrqUNLd%N+jpcU%ix$SGVDie$#P~&o) zhVNvbsz(|h1}ZN9C?NQ5+0j2rKkctH9)S-cd0sSYaj@^2N(nsGYLNbf-?7c5UQIzq zHz(QS-zGGfRrnNU6)o-4Q|D1$RpZ7o5B^av_JZX(W>6cvwpP&a&6bWEYkd17igMkQ zd2I@~a3m6dgfh1t`FY-)m9JR!?P7zh+U=Qb<%-LpJ?8v{Q*5@{<@((uAHn_)U4I=F z$MgL0!Z;gr6M}n4a1F8qw?J?R?he5r1a~KRf;$8ci@QT0xVyW%FV3BO?Du`1bMHC( z#~fyX>F(+7>Q}w1ibpiva}U45YVZ}EDzt#mbRy67BQ{kCT6uYSh5Z9HA&*^-P-+tE zjpaJ>+939iSX_>uYEUCCsw-PzJRUsbw<6&5?FdQ}?>dE7MSo+9?~|-P*Dsz= zmVCxrp~3NB)0tT9PenH&C#q-$XXdYwp<7mZv&HM`dVjBxA+LPs*4A;>Hs@62urmlr zrFREKNq=D?k>87M_$DG6pr#9hz01PAk$h?@{T6x8gkz?T@Jp`!Q|uCBCKGc&c0*gX*3ZQv7%V)rXq3w&UGz^?`DU6EJs(G>Zd^+ zu~=j&wa4njn$uo{jQGzP8-dlc2y^-yRYMqNed) zXW#ja{ry4nS3}CHr;V^El2g=&k!%O@-LaPZKXnnGdoOJALCGXp?cO(ZIcoKMe=j}A zL&9=o)CJx6yF#ld0(bD{#5r-k+i*}?H~ru9L+mAdYM8womXkN3V#JV0L>Ao39CnJW z6%^7ntN`*5ey$bvCCV3pJYtLrivB@ZUywpzKoIj#8b8h5)gh#}(Mfu5cRW91m9^iT2x*=_L)noI znFjYK6Q14(rr#+}L3J6LC&ISZeuwp*IhDHXoo4h;^o|?f)sUW@JVX(GL!H;EvNHW% zfBtl}e_CO+i&p6cD4)eKZa)ujC!Kc40F|!;F32Z_tvdP(=UiO>BOZu0V$Xc0w{t8Mrj_D1sW&)VI|`)wcZVAN#8;pUYCrbuQ%z7G%1PXkrB zYcr)leD1r?*CDf0KXO@uVsE0mSlnR;F#!7@m(?J*co4W z@)@lZ!uwMoh-y<{U{N(*uEr@wCheyAtE+z<vIZG3+a2v55pSP_i!Z zOchzKT}yH$oj@Xl&`it37+YjDr@;5AQOXtGy|78C;zGq3?0J0q|W zVKZ0lKC5t-*6{wv{_43!vXkqoPl`W9qJ={?h~yR|xHa(?y#sPcthpc=&=RbnHP>q zW;(T>C%^_%AgLA2=#M|@^tinRB;UgD?=7NF)8BEhs#A!1SanvkdxyIfFUU17np90* zf5`CS=z8wA^7YG;D6B8kF)Y`(@H@4+p^O>KW~icADO14i#Zni7A*gRo3k^)V*W?WX@$rDwF{0)vFiKBlkMfcSi+Vp>m|?Yq(4S(1u=Xo3&#T7-(>}X z`ok3H;I?;!+oqabI24Z94k7QjhOh(3om0tqUk;FxgZh`Qg;qf#*!*WcRE9!dT%^n# ze?1w=3#?5*>#?$bfk|h)NXHN@M+&^&%E!Q$WGPWAuAv8M%u;I0^PdNVx7fzcRycGs zn}NVVwi?8oZaF$%mQt!eZ87GX8Z@Ag)R-FW%?!sCDSt;*wrcnO_Tzmj0msZzt|xXx zuhR65?QBjliVvCoBaLjTRZ!Zj$44#OP7)DTV+>Bq#j0is^(%a%u4F69Wd0a}VDshl z2UAk@hW&ueA?wB@9>@Ke`Ofij$G#p|v-?%Dixh3;a+?;JP7M~2b+9&Uh!?3H$XeRh z`grVUuAZNSaRkwxDv-c~nidm@vwnHu;QLp9(fQJ1K3c6;n2b}G&HZB)gV~Tga4-}@ zd&05aIAS}7yd%IfGJ4E;xs966c_cJC(~TyQZiVPY!e)^;8)TpiX!_;PY>jI4d!{le z5oH=?D?LOzDP-D25!=%YKzsLV{ob7YkGd4`ZF4oYR3KFh_w7O0Ag?Y3iUEk|AmQmD z6@&cUM2H;0*u`W3OKt%gd#w?HxCy_)im!5s$GaF(Q`!{nP2!uy^VV;A_tV}zzcZLa z-LJ=jc}U-x-uO)xXe`RqM-tYsS`slb?eWXZ-yQ|q_9fbr_m_PW9SApnayiW~{q(Z0 zK%ty($$03Xev9Ax@&3&F$!j)5MV;;JXw{U+abh3R`cy6Hk3YWzfSY&wWp>3vD7h(_!#bzC1D#s#MNr_wEu{53^CHh1hV7fy`=zRf#=_-fO5+rW^*6Q0 z)1NUVs>R7eZ}I<}5Gfj8eCYLOZ>;%xN&WZn+4n%kXM1a4mtPL?c-6-2wxWmSaM2BLs{|)imo&~H0+)S8Ok>kMI4x(FNyeTM-kqAB}56p6)@qVmn8D= zp?NtVhv@y-eLe;?u&f+vnb{p<(Y}^Uo`QWPIpnR0!4`0x?j=`(&L?utV6!Jv7I0#S zq|V2n8HT0_kaa*Coc9)VY4HBd1+XrINN&?C3ty7Zh_?KuUzOzxf;|;=WEw6;FIZ@{ z%akaeJqZZLo|z?fEQcQeOMpas*KmTjJu}x-y5$aKHOu(&P9u0~u6`S6R(lm@Y7^XI z+lR1n-tA%bzaHTL(yuIUJK~j<4hmc$i-HYU_?{p)%K82h>4_*U`t1l@UzfeA#@k?r z%UWj4clj|>gM^nB3Pq9?>Pm8k%zqtIxG19qy#?yM_?c5C;wNAKGX_00U}ECzY-Q&6 zUZR!i=)3S6yWss%L`u{(>;Mq)b~hGw2eW#OXlL{kKd-tJe9Yo+{@d%})TqSY{TZ`# z^{dE6!M+g$9#N++{ieYf_FAm$ljz=Vqe(;y#9=7xVTTJX{d5R^{?imK`hAyF5|KPV z;Kkqu4gw1Y3~cak>*`XL7(d=OeaL^~eZQJNz^K~O9V!^Yf1bbGx)LJTgLJ@#dsnEG zFPo%#FHDk3L+0A`+&dcYlIzB857U;w`?5>GMl{n1W0A*WCxos)R1n-t(Wh~WdKXT_ zGl#)jDY^%}_OA%SZkPvt?aJ2Ms~2gdMmYX_tJiSy)9T^ofKa&xoaRFD zt8(RG@Oi@rY(CtQDKWKvj1_9KMO7$yc=BL zh&|Yy0;!$iib6bE$4e*Jckzhhgt}6>C6->NZy{$rh@Xi`twfuL)5F9J1zI&g>?%C) zE4pO8TOKLR98Kgo0uDL1odN}3uJMO3N-dC0Dv@aO-bArA618o#iLKYk&tD_lGSxP} z@obX^9<_5VaB*v}_G6CsKf{`-Hiv1O72+y%s)$9^PiG-%{_1bKIH&BkQ8{L+{pREqu&HVwbIdde_Pl8^b~MD`uih<_lFCH zIfIM-E%N-&4g~b?U!PiOAB2&^7aRZBf&YJZUq}Cb28_WPY3JUz|NiFxUbG#)$b|dS zh*U55?|=O3?lomJue^<*W&OXD?`<#~a^}G5(4T(#r^yuW5xl)Ok#AfE+}|3{w7nc( zq$8fMvb1(-f{muk{^*U#k4oSqPUu7T_7>2ZNQnD;Sqbp~K;^5}s9BMJhtn5>TA^8- zQ=`hG=H}_7()pu`8;ndR=Y5gsSczzg@*%l7FQ=9A^2w+97RMmS;W@Bt)y8y5!O{Vb zUW8Yd+dV#u?!YV_DJc_RMZC?JcmA{w{f86*<{mXXnoyEthRJ{3EB4x#Cf!Rp^d*zw zBd~W-z7KsF+2gcC7hy4x<2zBJq_j6#8k5tT+aLROzr0vK%WtUW`r3u?yf@+V{me7h z&0Mv#%fG4F0L4g?346O>1Yk3s5&%HqpN}sG^+>rx)rmK$Rgd)1x|e5a@y?6V-go(54o9@8?=~Oa0cClZA<>r-CD38Q| ziDV7(IJp=TK1i>E;S47?dSkjdXelY%n_#>1?D~k_ zm;2ksSAOKi9W^Qy=G{bq7Ae#v_+1>6mN)6tl*9u6z!9x7ElJ zgJE02JNZ*bN#JJ9Q={Iie9AMokax~E%TjlLu{M~HI`B4|xqq4wj1l3F8ZoVK%(gD@ zc`xE#&wbqlm&340^1(|yeZ7_3a?fTz(1nlFLPb9Qal6$;DsM2|cn|CGJYnc%S=BG4 z`ZE(*0Yybc)8!2Q+sqq2lPxjVoo{!T@+rFkz3T%tt#r~ev4l7S+pSI466b^nSaY^_ z;D_+Xr6z|6Ms*(P^b@KVTZ1WTj}KE%&u%BLP=iu9KdBG*_IpD0>nLvZ*RNFaajryos`yYD+7<*sPf17BB z=J}}Eu~h%m;2Qw};V#e9>IH}84@y@)OC83;N6nIZr(L|IQrjh@G0=0bL^kXA`|Zls z_`rTs=~2V?8xw`&V}-I&uX`!zJG-`GWr=J5aEND}Nu|``r~AkVihXEAjzti*8;Y_D ztq&i**L(e`?0&KTP7FX423(X+_UnQ%sqY-zV$jaW1wK_-%{MGs5ery^@I56gzmX2c zRV&qw^YskwUlo-YxU$@-NV%u1%*Gj1~5BpweHk`Eh9~a`K@V~`K2#urf zfsMCR7p1`}&4&7}ZChd_GOm*e`Ph1I>V48ixw{zA-udurRq92Z zD5&iY6#&Tp8RN=24&H_EFLFrksY{ zsgryGDXSR#p(TZN)~Zyy{zuc$;`VcQcEC15$Wj*LJ)CUXDG zwC6a*9w$d4-|OsbSuYMCfr&at_0wvC9@i%^wWNPwP;IyV?rd%=P~vd;FBo)v;<-gt z_e!U~>(FHP`=%*rBtDF7eJQH{&lEs`zn>b#*%{i|kJXffr+ve-ApU>8Bm*!3Ye5E; zkjKSi{^z5Gs0zJiN&U12rw`wiN(BU1$QJHly8;TT#oAQ(LweE9OZP&LH&GkuB*(4i z-~L8D1)|^d6PR`40fAG}Z2keb%6aKl=<#7Mw)_PB7 zw%#*=@9~6Pd_(wgQ1iMekhL*x(0i>m#`t%I$_%44K8q6D&ND*TCgiO{7YUv2kGhM) z<#~YgP+{}Y0q3!V46lYy0c^y?`Rb=X#6h38Qx1gkHfe8Ku{;5gF}#7@ zdQ(Y3Jq#su7_1>{Zb))2lSms?4KV^TmymDve9Jb~s>G8Wba!hym~Yfee|kXAg972p zR2g0u^>iq@`~*wLMM}f?c5eIehk4|7B<)Ijv-yuo0J<_8tJ8y;;fVFv(4+mZZgtse zm1!Jx`U~;+@}|JO&*W`<8g)Yx;zRjozx`uHM1U4xD~r@LE?vN&4KsRQZq{y*{&*G? zq*3&-xuYV#XQ>+%m&YbH+%REAxuES*Uh5JC77<#Uz@7^efTGvmZa8EWFU^ z^0*JwFw?VHYD)SsFW5IzWnU?4vou}0sJ|43%V4o|cd@@|^>ksBVP~KQRWVp&@%9+K zH>y*gzbZbU7WSa*Y$|_3M1?ujIPT6#0xvOFwgx?4+B?rFkAub1e8%yacjxf~tf$^B z9}O%3Bxjmp5+zVxo?z7swtc2rccLT5NKyF;QxAZ@jDn7rN{DGkWPfnZ{@qt4tGC+^ z^Q9h3`a@_sKDC6m_++|Imx8U)ms)DQGq!^4`vT+lln7&gcg&F$U{Q!1Jk4mAzg=6R z{8(fY0fy;U@Fof6mmKEE)Q_PE|L()hu@2V4Z%gXkl;exshx}ydI3|SRpJpLr4-P!` z4w>W!IRr{==IZ2638x%B3Lwj-2I6p7ebj5CV`5ScGS{F!N0Wi;NV1qZ)1gTf^wtII z6Fi;Rl?P=>=!VIMz)Hqe6?>;Cp9jgcpKbOt$fj_{H%dg=1tB&+?rE8((8IMjdq&7( zh#W#kkVBAs59TiSd%1M#q9+_WAKcvz_o|I|7a2Y#e??OfWhA~&$?UIHv~^BOo4bl; z%kbI-3BQ@Z*d80#}0+H zIzJt~$UdsNIoov5PUmmTo$q!hBVexHpOUG)PqM!S7w8rxN6y1^P7)Y(UX2Hc9-qc& zblcx5U@=Ye5!JrZdu*b{d;iJZcv;zzcp_dAhB+9Ek`v@m`1Vb%>EWL6{{BC`s$8fk z`BS+lc8^FztPYm-?PJ{s1DYrWLLOc_N}XqXCnKL|TCl`|fy4lv_k#B`{&Mv|6(f7E zD69iP{i$hN1(u|INxcO8sLD!a{lqxTNqYfxf~zoYY$%ebHH&tvXKCE zBqC9il&4zhxv}q;Pm6;{{baD;kUT8#V7kkeVr7%}ve!9Q{1J~qy~K7)o}6a`CK_#L zQrh?_vS=r?XMB3o-X!b*k_qJJSQ;%oA#;1svz0y%vNq{l8LdzaerUItfSMwS(y;yE z9!J~=FTIcIm|^q!XhL3PR*O|SEqA7&=RifF&!iHbDgs{4;ZUtZ2mh-|tp;dcK6}DO zZwjx@nQ7iP`8M}^<0<>pq#QM79j1&XSfB%CT9TvWafQS75EC*Q-iKqu`z>!8`zv$` zMAct02qIcTGbDrxL}4E=eY2=gQWjamEglyFB_FK^@Ggv=ggs?>Iy-!Y6MceF{CGRS z6E}hQ84v(TJaJYZzsGJGwjQK1hSL6$XmTKS zKhXfaGOvB?$C79eW-MgcQ&xcNyhNXnJ&!W|`=uMbUREld zz#7NLH(+RFw3*QACCB5q8_l@C%>UGycev~Y%Ow=4F*)aQJ&c=9`@;F6c{~;OkAYuj z1X8%KH)`PdBa2BMTBUUwZ`9)Z3R_ysb7K4=Nt}uy|{) z82NxVsPz;6)-mQh6{xyFjoB}z?gt-~#=?8_=z{g0CRcB>12Dw*$HgG5pfL5?Pa zeU}x|EGXiO5E;AyomY3T+jNpwip?mFIR-IqXTNC#@R#q%TC{4cmzZ<=(Z*-Q&coj`m+NwFgf6-a&D4HyI71NSN zzdiK$X)gXFF*+VSMOvdXEAvSm?NX&>22`_UYT(h=4>%GrGHF6M9c2|D*G3YN#6yDV zytaFzKV;eIiaH9Zz4+frHlMuqSlo*MU?^9<;8f}CYuRK-ET$!-e=9;uwlyl5A|);XD1lT6V9QFIUaVyWht!Jb$M1*YNMiV;o-J zua{a?yj)JkOtg~&*r{*y!OTeQKOJ1NDn34u}Jw|_2U(PiNv|MljV1^-TQ4DXA#F)XC5wx=<#uDST_g$ zjrUs>tf}rAoh9eGmD-8NV&z=F=gi<*gRrY%tcS>UsaQtllQykp!?X!&buHGkBgcrM za@D{Y+5Or?!cXFhf-*H$3)GfsH|d`ZTN}~vzx#^bC9m`AweaP6cYAGxwfi1-nLaG7 zCDKC=x;(h?8grhZ_S4I0sHm!zW#6V{KrFLfs8V zTJpVxZ-tLYzFqUljAHxJ?#_2bI-~TZGw`0(93_+^NPwWQLo14ehdOMFg%k4idPt>E zbJWB8M61r~RBCs!v=ORR-R@C(aGY^>7qm)zO{Osf5+PzF*FwE;C~_L))VE?gpAT6M ziR+)@Znv@Kxg!f#xe49OsrEXyZ(2$GWy{q9YiEq*nXf^;JA`=&o@(I^e7t4qgjK?P z$f(0w5Z#l`dzIA$*J?BE$5HZzPuzVRzTG)96I2Sd9=Goe9M*z_n##+Ks68R?|KvPg zoJ*`T&bMei_H{S@KE!o-YiTQ;;#4d_GyrtOL&u6qL*Iw z08uCLSy!y9c0TFTrvZ2B!HlOX)SGcV?_(Dl*Ma^^DuzLwyI=H2<*J{Y`hEtqhP-VS%YOCuM?Je zgzFW1W`;edB?*|PD@+_7mN+KXfaaxjG8dcW*(3;s;A=TtZ{*+hYi5BC4bg4w_K34k{yF-a! zfN``*MX9-N*3w4cR2c<-M;*IkP^PuLIY+fG9x|J?jsFz8O)urHS z$0AoA{WOZkCZBX+ih0?S#JyOwNPe?9NfR>ldEX2qK_J)aaZ3Nr>!MDd0NEx~|AU~5 zi(q@iSfq8M@#(z@5I=x&`F)L^;D+5LM!8sb7GsvX>^ORW{HHxU^(!HF&6pQi2t4dFOe{dP^#*Mv(|r z@dbB>t!6%0u!tz_8L5rOFgfi{&XQo$L}q;aX8bL6nox@8Q==U_x+tl_G8d_4AK@mF z0h&FgX0^3V)f9F58#Eai%_qvRFYbkNvFQcy! z?1}C~%PLh>_kB{(Nkq$Rnt2G*C9uiaThRq|>Sz&zF4z{@UYdG8efL=AOBL-!brt@y zE0|OJoXwt*Tn(P+6}p&_H(oeyQznoElzO^U7xHRT9^dONTiuBk&VWc^rM0_M>%+Cq zsh1$HlQqNpP~;NPvgKc4f;t~pKZltkAfRMNm1CI(6ip4LIy}$Y?>c@NNvO_h`oX_y zY1tW?A1JqC7A3Iq+Dpys^vtaPMgB&KN@LJ2B!`fHO?aIS37!l^5w#NX@DQk$TG+nM zf9k_pNf3Z*3Gz7L9H6~Ar}3tgh$3+iLkxlAtvP-vV}j)%xZ^WI9{C3&q$@rLv71to zGd!+9ZUY6IjZHZ!Zz#Fh@(jCgu*{3E9aPs>?(e@oajSRePSVU@>V zBi1kItzT*TyD0oVpePQZY>D5xOYj?;{tNz(2p@f1GY!vwGirj`to<3+i8_hS7W3J^ zWbs{)i=ecf2yHoGSQE~e)$OtqM}f9=>^!at^{D%a*lUFoJC4Z)K6jnIwV3gYR8y}Y ztp_+S#o9CdTjzTtJl451Ssvp0(ksEO@1>Kg@gc%}A{vdNMQbTPb#2e7C zI>Ww1PRFYe+?xV&+=S$B)FlNTE>BY`{_u0 zJHhu}$@caB?`oy@I4&l;o>mI}+#pJWuzilAx^&R)0!WS zlR6M^_S44+kB+rWE?-yT*$7-S&#YFr9CzTZPlBG~m^3mGF!0_DbK%0;;Fl3SG8FSG z>G4o1?DkW~Yz4j4{5%DlFF~8NCnLU8w~aGGSVw}dX;e5IQ(A9hne^sdl8}wI1X>na zkB{!_NmAjE5bpDh715h6UV7cj14H$mmrLcQ6TOXk+e@!j zXdJ-gq8icUo>;lR+98o@a^YH(M>;54M#O1Ja0=S}qw;j-f=3f{c6w^ml85h+rLw=t za}(7Ep$-VxOD@D7V5YN9MHck<#JIy>Ui%spfVF?DHm`6lK-Ye_M_^f>CVRfv)FD6G z=5K`2%$k&X1#OE~VND9%lo8R3uPkd;324ygil!jUx_S}?=DSacn6Imnh6b(DfwV1b zdk$oE2uXjSlp-@cQs^zD8W9li;r`-u_Rfsh!QL>zL@O`%68jS@6S%h zDErv(B=44e#8m}5Y43;x`&swk?5~)=9I#4hg6!~KQWGP9`6NV&VOQ3?gX7{_TZ+~+ zHXEYV_sD02EQA<>4#YNZPf2~uLU4Upb2>u>KcKTAB7l*YKtjO#&ua_&Y>o+s!JrBK z@Z#|V&p-cJ69M#a^bFd*kE;7>M{owdnXrooNm@eBpYm!M+VKm4Fz7}@S{kcoydTX( z*lxDcvXyBJqy#1#byBBYrph_rIy^+pk?bweouu@l7n%O33ih$P?+|@vNE;wFPooO0 zc3rLQ%YfT>Y?-Zc!G@6Ue>FLuX)#H-859yw9jTrdF+i3(Iq;W2k6gu-R^@%wG}wJd zW|OvP?fi5#zZ_@Rno)bFwNomvN~Q;F94u5`a^47uT>ShPhUICGeEetK^-kMYI+P}u zY_BF7D2lc2pB2KXy_!I`w-aG=>0J?InAJLd(h4r*WdEVLobHeODweZT*Kd`_u|Kca zsro!pU>V?7=Sk@QP+>TL3Eh*Ry706b=$yn*Gc#K8ksA9G)24=GUtr_q+O&=CJy-c_9t<(qd)9ZasD5 zhJ+#n7>;%hZ2~(~o#v-Rb77yGwrtZNm;&9O?nx41yFV7mt+7;XC*b2VOYU{gA&aGu zhzBd3#b(D-o9=Dt-pDr@BgVl#xf9)7g2#=UQ8O|&kAi#1+>eV7PB&vrqt9=&l_;s? z$(C8Aa^_3YnL>I4x!SWgf67T)HMcZvu2tevl&8j%$Sn1$>vZfH%Qx8D+qBkZc|_fB zr&W>52n*}3V9rPfdx?B#R}d$0(1IMZy3alaB&5uA*f zK0lz$SZ9?H3EF&rUc**}EdmZ|`9=Kp4R)s8-hp-FU9DjC^00z8Iuk0)UFuS}K_QVc z4F)6K{{)Oql7#+zy!Du(83#iyc3$4#ty~KkL|E>p#0Wnuf}_$c%alvu%hu%gH-CHV zjA?+894oI~$yI2Op-bX*$a?-t;bl4&RaZD^9$4yBQL#1v9KsEj0Uu10PE=K(SYdXb zbP~xwK2Lm^4q{yZaZRgpt@6R6PlIMCbW|@rdvs-~8vbD3DdK zuKvlA4}LJ_=LiCpca#1I@|8@rp>h2wRdmfH<4tU-xnhNDkt;#BUv|dC_Z_rZ#gG_T z3JU@=d#At&$W|-wS2!`wMfFWLjD8C;8tKrQHh*JnLfh1;R1(eB0~@`Lb?az^?=XS- zVz13>pD53J-p^FI2tKVNk3UNFfjkW`o4`A>qyq~!=P)v8)We~_V>~in8yB=07moN; z34H!c#CRoyPSRoV=MNOV0=o`uc}5Hd^vSVM(SpqzY5W8C4O{Z3{qJay$1$PszoKBXf-A5UVMvQvRRSI#0v74c-MVex z)h%a8E@kfO)T|cE9ii{&Y2%AM*!U5CpRIx4v(@6($r1;@MZsK{{=mn%gn58ry^VtN z3vk|@lKh>%$3rLb%ymmJ{liDX0XwIsMQj?4ZtQ>NWAqEa4@);%KWI%n83q#0xcgML zAbUbPwyIL>n3qCAPcU;$+#c!!QdBVuC#d zzaF|fs*5S5ef=5SCD$$^0OAOtsjFcqh*ACv zLzESST|nN1#L9>c=_IH?Rs6-j(HkFl5mL!+r>@7&K-NMqj)g`USOb$-N6nP%=kn!Ed;oS ztQ7Q}-MstTZwV@04AeKK^<-o!c-1M?A=mBMnC(Lh%JgPIMD5wMBptGl0GUoV-{^X# z(d;)9d}Ds11t$Lvy!XDGg*Gigq{9GQpwVDP`}v4h@Yay_%IkN|SE;)=B+@5+ej#Yj z=ZBcLUSm{%0joqn=(_V14jykAH6#lpbXctqJ=+|%Y(WQWusOU!&|Oa~Ti$N#Hkzv< zy{v9!Xm80lstG*Pa=EHv2~vCzcMMOAvz`#Q#mFL`oDuWoG1Z71F1WjE(?3!5ZgF0)d!}9i*hU_O|9c^ z`oBeKEsFfa)K?e)&j`Y?a_O?Uk<%1wq^bIIT&HIt|Vokii*j z$Ovf9qS;EMmm!u*E{R*G0d{27Nxkvx6jdptV+HxGXCEW!kI}K&NCTo}`Rn#m89xO$ zY|??B{)AO+&mroGn~ZNd%5sl6?AQ<}sd`v2may2p6ZaR?Z}YI^M>O*H61kan@H?f@ zUJFqS4Ov|pTTIp@B{4m76h6T0RB0c0pEZNMJj}snOX1NrjycI zQ>!%o+3RpOTlL0SBpavc_T|&{rO38M0vn}PMgDOw_i#h;5{30=2G`P=0$mZcw&GAN zrK-mMjN3(w-HGOAI`W5StIyy=*mOMWf)WL}1?*(%*Oddm%WK+!lp`44Y*yZi9sVCF&VQPmzb?j-0Ikj!l!1f))9U>17bq(KNP-T?^l$!IoBx#( z{kv2T0eY5Pc70R%|MTzu?>CeFJe4U7uAk;V#moO*_k zx8Z+XzfA!;hvUE(Ev`>GnAv)qwR>QgzH;tm3WKWn0rFL+CCypMd=okkZ$p~c`cvLH^lRou<|c0l^C9{8>Xc+ z;3atVQ!!~WZ)QfFkncz7M1czD50^YgBdJ(==8mHymiJ+}Eyd9NiBdZMAlSFW#Kif& zVBWA5UZo3WZXx zg#}OFjDhwTz%ZhTs%^jWkFvSA#H)SY?oBi2w#N}PR&JGHSk__{OMec-`VPSF&s@I& zeF~rJ`3{UVzmk~~-8I%hcaxNqG%k6uJ}OLT^Cxbu$nVc3oV9q_msxg=kxYMN4^B4o zMqB`3)Ck36`ONS<0az(SsM{F8*!1^3RiDOXe(qpDm&3}JOOH1RQYwR79~@;Kn^+?CUQ3mo2wQXn=`fS2HnL?ppG`}>@M zjOGu_sZYJV#OV@_7tb`y*WPaenZPTYVP3O!HkCK!Vy~)lW$*Al=4lhQe?7h)#Q{P# zc;49`)6*XYt>rolFqWtD&;Q zHB#Bsm$E4=k6#o4Clh@zHYum;#zc(g-H>&TNbctsxVTJrFtTXQzV^Yf*MG>a#RoR@{8V5o)F346(&PmUUzg_v^L?-7>Dj zX}q;BY3=rXb@P;eseF=&Bnf(6@&qLna8d5^xUPOk+=Fp4Kz2A^lPl*D>4)(CGD2@H zE{ZRXF>-qnp#pqo{J?w3ci(WimxKQa*LeDBaZ4f(aK*pX|3Iz+%SN_0*9Wux0l@(o zD*e-F%s+^}~jjRfyh3H3N$y-X*9 zDjPierFpI@`?~gOzN)uU=A8%S-|MFD{2L!jzd(ls>fStE|2av4b@YXxbGBDLRBvb?#v*>hW_c+*JI_{HC|gQIv|*Qg8a zD{nuFkQZn-yUAW1zRR4Zo4#08kiEG*KbC1w=o1y$7%eDdDdqO83;e9gtV4A&bZO?& zGx3v&3?=Su$|>uo>B#2Ww%qE0R35dVQ~|RowdpL$PtvQeoOd~YA$uxpx)%gRoP-i_ z{cP36Irt8^22(Cv=)R~4@3NOQLe(p}_!iJ|)3T3<8F_0C<3HX;m#BaJaqGh@e09uQ zFfC>|fXKOl{=CGzKj@?6`*#yho#jfmb{4I!LV<^63fwsp)OyUldZUkkD%LB*UgNAJZ((4rh8zuRpGvVlGTV?(<|W_(3JQm7x<* zrA5|G7_6?*|k<}H+&cP+tBy#Fe9+nd&)G;(N z5E!%ibd4ywgRJ0rus?S-CFX!FSh6yuGL?JT>}n)PuxQO}a8N*KQT36|{WkW;(Sc$yU3f7pP zJRas8-+eg=Q7+Q`eXst=YMvKLV|nRk7A!*~s4}#JT_>1wJ@~SjU;&CBD?Nd3o+6}N zegtKjqwWPm7LLm(DX?E0YEu1}N0 z_G>zY+AeaNTN1lbOD~#Huz|BVc?aS%T@kI=^#uSr*Q?sGf4vs@$KEP@?GsE8SETyk zEe0xtgW#tTx!nwu*V)nxiO6M>S+8l1uV9Biv&hxX--VfVVPZ{tHpdllwg~bOEb)1- z=E%(`MQ^%;^(6wN&SfWn*kPelrBJmOBw~a8O`(t7^>Y8!4f+AnsLOcO9=lL zRsQNWGRRs8gI~&fby^a?>R!5mYPjRF7Wz+%xyL&U7iMgSt&jfh$&aERLon-X%qL5d zo}+nOhCwci@p_I9sZW~;K`v=w{D@a^ zDoYFDrPB>Ub)uejd>Znc4pxPr06wRErwc_{T!E$_o*6eGRwQ?ITI;JVT&cy;;Xy4( zJX&1;F6W{XCUIFWiTpK|Fu2a;w8Hh|Yg283OAwC%X{02qe3yhJ*(Na0cLdQsNA0OZ zJd?i+7X4h-?huph#j+#;w@h1rmi#wCG0wzya>}pkv#_ZtDGpWOF)acxRc_#|Y`Kc_JHR{Ho-M09VCYA-n zofaX2yf}eUjhNlq5ZrBwCEquZwjuSbAniKTGtIJf2F}Bi`1aw~U`Zs8*2ixXKjuG$ zVNwSfsM~aabAotsdWeFgfbD(;^vsD=3>jLaoclw=QKi`siLDy3W^LhS6R6o&$sCrr zOG!bofL*06o{m0e>*XRy-{L^omz0vPHN>Lx`JRIF+NcKnL!ejGW6+=yreF zh8yAgBKh$LborN{pd+5ZIa6odtSM5zHDmsx!)jU)3Jk*0;j#eyC4);B1T~C19>=q) z#*o+jOY6v|aZ*3k0R3=?E^8I$D``Ll$Pv(1Y$C`MgxWmhFno6eI~NA7MXQ$t4ydE>zyJNX#C>8l+E4X?nLF9=9% zIq2Ph4V<^+SR0AlcS;!YR>w>GTpfhqCGy zv2};$a)(1%WqQcT)GyZe1TNm^zh7&`o@0F@SZK%UCc$XMx#s`v8Rz_osV4ZdN8mWt zh9s5m2kK}S4REh4Mzi&mWbk|ps(T0Ll(k(}7k1-x(|(QBoN4+CA?@}U?$iPMEEG|V zI=GW$2(}eWfUU{tvSAj}RqTxbX_-7>)6J?wI0|xr@NefQswph(9|l=9y|3jwgj={z z-I$n}1$vdxtzb3i>7uM!-shBo2@3rp?_7QvV*Vh~j*g4f2)y#*b^88%;Aas5my4M9 z?nrL2?9jjYfa;ilv=xS*97r#3f7hdLTYz{7#(atCEkh_l{@X?KO$cF&rom1t>`jbz zEggeY7!a8{$geS!-E8^MC`lfa@h_oZcj>%MG zCdD$vW(oScLO;@WN0$`~XzZ5tvUHJ)52}gkeY@KNhUm$!XaY<1Z9K-2W$vIA{?)6! zirwa8g11OcHk2sltVkHs)h5~|28oe(Gv#Y2XFi8B4f8JXfMGsQ(hn%k!)cY#s|UGS z?e?$~+d>6r$jLTOuIYET)vIsr+=COM=CSs`Oo_OZWmfzXt2!@xRxlAyQOGnw$NEt? zE;}>FI7|I%w3Zb@Af(b&_b{%>v1Kp;63xY3d_;>oXd~)4;_li94g*`b6Tg~FYRLo9 z)9I`T|Y|im@Rl6FKE3j`OctNC-{N# z`d4;uO0ewuF@NkjL#op?ga7^qithl(`Fxq@Ifqou$$3|jE_m%L?m=EF{0_UDIki<^jNsU7(n5KX2AnqdtV0)W z?HQFH!@74QmZzUz6i>T|bxfm;oikHD4eS0=gmuWg|7lN+ObQu1bkUU1ACXb`=?5pR z0|k0MRLE7IKj{By?<=FK>bigFk~n~f^g%#Gq`MEHASKcrA|MUYB_$z^AR&z+jdXW| zbeD8@cl+D%Z2%R%j&r3p zs;BZgygva{ZHDcQcAwTY`LNi|qNy4}yjkl(;js7>gQnaw%Gvf*d534L3z4ewi6h1* zX8*zLxPpj!#Ah|Fc^Khi7=GD)1X`#`oA^Ctlr`C-Kj*+`Z4>Tv2bIs*r7<4U*A>8* z6PuCw8A9FPHZN+)=)-kh8FNGEQL2Hr8&Zneba@m+N<=ITKMgX7DnAl7EIZ}l_C4U{ zwy?;t8?v5GP@~Tq)CG~B;pw90A#~Rpo5(8$M)2n))6f&s`dcodmur^~#;l)J{75Jq zUdNUEc~*cCy}D(xgSwBdRcGuS4(HAm$`a~P4$D(sZ^bh`rO6)ZpQP<&o*=QVjod_B zDYFT;tq0=43`ENFBYj{00>24M{Rur>;l9mkW5Y>+o(dmVTjzXWLW;VeLq>9(4FT^= z4b_NLqAbHs#d|XgMM0%%aM!EFWE=~lswqY5C>|4%0)ybz9w)a0R3&v#pT=>SwjiW4YJli@1lM?}bC4IgA zlb;Pnm~qe>8)Z1MXI;9eE47$4=iANj4Ixod;oNFZ@{vdkw7-Kz?(gQ^| zj1jJ|(c=@FiF6b?ja9j+pbV>!B zx0zGQgiqEE#MC<4zDWr&MF|^Bj9M8aO-qG#^f^%HyBOaQAitDEn1cvsNE_77g-i_V z5*3~Wj&+$>6+k{S@#{^PyeYKRb9UaY7f1xIQ?{F>78)|{a>bsUiDPgceO`oOz4%Q|=-^xqG=;s^QcVxh?jm zJL6(|A@Vz53K7)S$ouC_7gpjg;%=#%G5HObn)y!?+j|&_?A|G-H_Na?%i~J87)en; z@f-x3F~M+>J?v2ax?fW|?acoP(xo+&;PEVf*B z6&d9X+V@At@O=Gj`!j8Xdx=f)o#O;~Dg}(asu+s{E)NhkEO{!BaSf4p&DI)Z;7+S+ z$DMbucT6<)H+bUtwh`N&6UK5|i=UvxsE-+&t?jNG7R|geO6>To6Fam7i$c(ZDAT1< zXSu9imrZ9H3v%6z zJo#iN1Ao1jGxphc_in`P`1iFAtt8e%JV{sRn0-nRu^I3wDoLrm21W82;VEK;6qMt_VfG_hBz*ZVC0^a=bxvA z>w8=W`7Lmm=*w_STFp9y%5mX#3hs&8-8W&eX%xVDh94E*+7d$buxiB7-MM{j|1mpegblYEt`pxj-hD8yapm z-WRYXguY%XmBap2IRT~#Z(D#w@N151%}<>p4axPES-u!k<1MVG?J(c?n)pziSoeFk zKAe5j3}D%XU510!nPZ%IqW>@_F5LLoEWg~x!Fx@612YJ9qJ#=RpFk$RZUqV9rD-Zc zMqy47xUsqyiT~4A|*oy$u`s)jLj(LV8PXX4=MKlJJ3E z?f%RW-0#`xAs}m1GSmh$0MUJ06_35VXjgU$54zG_DPky+=LRqHvy+d( zEEq1K$Z!ZF?*g(cmcr$<`O*g>I9yIJ2L$rHhoP=C+hJB|^!_3e2FQ%V=46>YH1k@iezViM!y z)?eZ2cB-YB_9@})f85#Skcxs$YC48kIZt-K)V;@sM6elOTPI@E78||i!q>jBsxU;7i2MHDdq$+!li~NOXk523@_C~R5n9Q{_ELeZh{k=XHvnQ} zPu14I(n!56@0xs5O2n@)uXBmM;iWd|KWXULYq$mFf{Se_cM#-wB z2x>#0F=OLOiW0TqEL+`|&R#jKm;m75xQ)-w=1A5#33l1L-X)lcNJI(XNK1zl718L| zQ1>0%|FCwJlps#%BN9-S$91kImEA(rpQ=FIpm&yu)8;~*Hagu4g&&bRJbEiOI%qcX zS)L+!{!lbMHxdK+H7?-X$Q65`(`z3qe=t{3cA(cgl6MUaga-#^%v36lT;GJgv z;DkHT2)u&kdG!Cm19~|bwwK=9SPrq@AThUnyI@z;OPJdKe?Jc*6BN_dy6Npseb5D* zuBIw~$u%0!mH?8lBGb5wv9Da$)iGi~sJD&DMBd!>N5k(Sfcxs}yDyw?w-5C>tY>Xm zQ(4mgL7xQ#K7fh>Ac0WtA*@`T_X1zOLz2x>uE|j2b}p%JXgGRaBX?aLc@U#(id%^< z#nL%d1!gn1BiKPsl&5^fqd}K%xd=5L4X6a)qjQU>)QgKT*D^fRJr6*Az@)d zE|cZ~iBX-p5t@g_br1Kx`h4S$TyQQ2J6NxU1KCiVIth|0R>g`-xfDreQv!@8Z9@F# z=4>;vBBfNH9-q8V`U2#lCEES?`lh?Kr`!sgN_v;J#B%GSe&>43N%ZNZ&)YxhX!+gV zGRtt$f{SX!TZv`B^R3~09`oFb(Z*<$CP_3r=oN*JsDPu}>V%C{7@d;#h=&k>{m_%I zBS=$K4&*`#OA>PFELZA>gWh-Qg)Pe1)? zbCHFGLzE=Y&MILcZP1tXxZglNjOTHFby>1{-H0@To4*e|aA@|iq6`B?=*euEhWd_RF%k*@H8gjuIm||&@HCUS zNzvk@u7$Y|;AOrA@=gBw%YH0vGg zzfzc%VkN(#F3~FYh%6_ELoRe=HB?EuAxT`94I9t`QU&qUEZjMO>! z(f5Ig=sTJ1lB`UQ-o?0ku4adHh*6RK%$U;!hzz|(4B1M!lxl=`B@?dOU@_(VL2Sy$ zM;0Roc@n%pM(;;xL5Jr+1cFXW?WabSR5uiDB;5+#IfM}1@ByMk(EDsMuRxE6mY)x4 zuj5SME(2HS2~2)FUIEeOnvF!rX*PuEoIRtn=f;sh%taxF%XSpnM=E=Nq(4!j_w`X& z53@W}so6QD@agj~DTQixL|u5&9exdZ)jM@LqTy5tb$RHY`w{nva-)EReB8VT=<@4> z%pjLlUPYTrmzBkFG#NBVHDVxxOs|0BC+MePi*u;HW zs`u@6)_NL#Sa)x5dCJvH^UMLK&F}9Sf)dK-)ev(Kt>!vX(yQliD5U3sUWv5-Ch0X0GN4479k5egZO(uJK9hz7tx zg?H!Y=0_#AZK(#`#jgowAZ+t_dUR(tq;$`)WtfTYGjl>YA)j?Exu6|Vu~Ftngg8$k zuIW*e!Q_S$AV-9y@uhxC!q$7xSA0*Mj5|%$P@bhDngfSp`A-E~g2}uxx?G0z8<3BF z0|5Y$B#xI*JR;YdCxC4IAV?UN=wNw5!kyfdL7hE<;SPUD(VKYfAzddrmCF3x#a<;? z@(ubG<$M#P_kk?wj$3AJtzbI7*fLGeumXcY@wWz0^Gx_bc+e@BlX&Ao-Zdcuh5B{! zOT$IJyF^1j+t3m89@fJPKrNb~TJc#NXE47}9qc>nIw#W|V1|oVXSa!Ld3>J|=>>4^ zKcG3v)h_;eDzr2Iw|F;8zJy;&-sc{bxSGSDdAP}Lx8XcaYh!IVAgZPGk%jJ)(2!*w zSgx!*v-bYZc4Zb;{y+xD<-_ifTYk73{<*7(>RJ-XoeO#J7`CdTbubZsWqvt0~K?h0>o3<8vgcBw3q1jAW%2N-Pm zNawppY@Waf_*9{0G8I1nh)d-`LpP;2Yjk1e%Y%yo}}} z=@iNzJp~9liJ<+7Adeo3tqUtg{X=96(W)}+J~JoHa@@y5_fuH&#KVw#yOD^$IJG)P zXma2#rj2y^u>7Ao{;&=Z#NrMd)0Vn(MoCaysW~EiFy3Ak>qd)Ac1ao%5lnx0o}9A! zKBOhm4DIR>1P-$~uC@|0E&aV1C8MSb2%J2AmzaLCO1d==eKL~CE`8fad76J zXIF#2eB$Z?tT1??IUq6hD2&my7dX%7Xx5;qs>VpPl-zfdeR3?m%(# zqHy6gZrbfvV1U(f7lg)kJ=j}6Ag@6{G2U6T{H@2=N?6ytN52~talNkm zyVos+8?cAW%^yQGY<{GK4ZeOKNdy1@j*imDd*PxWh}RiSafz}0>;G%8E1W;G-ORki zhYwnW@u>A3iN6^OjQ0!5`3m<|hub1dUK>3e!p4_YMhUqviIL$ zaKL~aRm;`Q#zP4SQNmpNlVinu3rPTw`mX^#<<5A%|Yyaurx+2Ww z^$ETf^fVTiA+D+b3H!k` zq4*EgOZz6APwaY=ZPo;Ga2+OF`+vYtf*avlH4mTsf)o^)3?RLAWdrbC6~>>)Q2^Es zxKtsWP9r#K%lsn~tEWu89=-Fue_;GTC2;ipTpgM0P1CAkopBkxgB-b%a*Ehj9QVua zLwW{WQ-RhJ$&?J>fYCFBOycRdRg#l;$?<4Axc^qkUJJtjY>AbWMX^cC&|5xHPBoaN zI%A}vl+E-7kkO6lQBQ^#GO(^`*N9aO51fJ81Ca2}X!3YT3VEeI*Emif7pT}zIaU!Ik_Y?58k@oH`TGf*M?OOZS9 z#R%g+m)imYeH&^@_=m1!o$S>xqK6lYPa#uZjKD3e<^X519N$<#mTK7 zlR&yfGwgekYuR*jSR&w_%e^L_SjXYk<7=r~E$9Lhgowma(5a@`MtiBzIUC87`Ba&Z*d#Zh)_&98l^Hy`I-DBoC6 zTAAe@JX<{*rgo0r80j2`LjpYlfuL|)*vTO-P|z^I-{9XrLO;7erS#d$I{bO`y9^!% zCLRXlb>rP%1+LQAAgE8ufd&}2>H*5)ArL58sOg~F4S>K>20%^HD=*hb|8D#)5|RrF zgb?ehGgrM5l>llo;7Q~8SK~0y&{ELef;UHS`4J=)8OcR7Q@FtEUyXyoP>-R^fopbe zclQwsP*dt&%KlZG*W*WlgM+|>(UJcyNO}m=R6yKM{IAAc0TM#h0~avg?rwY^P*c*t zml45#HU9r%GU7`ohqPiLIyyRieSJc5Rywls&8^#S5>tUQaDvZ?m6g1}JsR6|!<08? z#nl=Kbzc5aKjI}^k-4I=z95ne&KF3t zb69n_Z?%oud=ii;H6nn67+b0Z694>CIx?BU`ewi(Eg^wa{(F|nh86DZT8v$_ZKCd$ zp|fYN`BGR~4{=>CGV|!@l z%!NowY~Inz2&g)``33zOQ{O5Kj@&FEEP88fGvGu6)w<&Px`1vJ7hChN;gnnA4=X^9 zkjT@WKF!Eyt#y=sry#cD(M&zy_Oo7RG@^`dx-s5IT&@LT72#@md`i=mrz0^|zVHyG z1D6Bro1KJ2qR76ni9~tbZ3HcKZ+ZOrpN9V7{Gs>2I3>K-a?u?4BF+0=Nu7aCMMtq6n1q79)uQ zsjT%m#MneT{Ycpz8Q}Wv@Gd5bbvyo>gPiK|;rj^Ng$RGgTH~0P5U9SIirmiTiWyIp z?}1uesQ&%w@=fUl-y4?u)3pyx${OVcRq-a+w+_+YeVM-9Um=HX&7YVoCc3fa{C-*a;rO)AekQ`*o9%FYV8rb`Xq^(mjN zn3mF_VU`EmpbcM9d-Df2s0a7L)?Tt*L+6|E}}mXcKF!| z&Q{lVk^o^!K0;W8{lJAd2L>JuIXN6(K2^eyN~f27lFkJMr{s#9J7Y|*Caa-eR(<0& zFx2oTQGI7se$=S4#8ZYoL6KE{yggQiAS~T@YPT~pLZXfv$j4ezfXa1icDq>M?GIM& zdBv2-0I5JDRnVgb4KEiqu*w2S?(>`s2%RlA+M3Si79VgGbW__njuRn%CGBk2lk{yM z$?&P`7@~Sc!pCk97me#c$oPmRYjB=rDJAhh_uXB-lkg?uirNy%Cz^0lh}IoL?ifCR zU}Cmx!EpYl5N$JXWZtKx5jYdTONx(=|Ca2%XTyt)N*Yh}QC|5BMPib2J~r#~9!Z9G zwZ$n}Spn)5_TN}mMvGo3>%Kkz5*U^=C3yjTHbZGMjumB^V`tx~5o2cLxKI{eAI}e~ zU2_VbpLVseXH0|y(>fR$)>oJ=j>?dK}IgVSO zBRF)lWNp&@Lf(>nI7VG0Mm*h{9D{hwM>x)?^=ACbi?eZyZ zn>{sQetB-a@w3Xiu8{-}ag~b|xDPxuU|K&?=FK$xLQ>JY3kQV;1&lLgLiG zq8I9pvm<;@v1osuegFRTLCc;>=UJ8Qa@bxV)$gEqOC@fYdZKexT$!mIxA1=XQ={{PM#lz7`b~Hg`e-=)?_TzT=g+3Nyq|69wMv_ zaW!82%(lYcPGUIgotamKv1~*;DN;}GfER2j96jV`pgiChf>mhqGgW7J@UbO{FQVXy z0cEFQ#h~OMuA)uE^Fh-W-)L;7od-LTne5wA&<4IxxIE`Y{4s$~K+sz^KeiZiN^_K- zuA@TltW_G-#vvOSGCS$V}!SR&9&wR;U?v3lG#(y;@ytnkCXBlH5LtGe{ z;>5q?MXON~1sH0%`uudQSLOk-*+-nJ^QnB9L8_x|64iTjoe}q{oXtho`BZh;GN05Fqaa7Z$IqV?sL5rgMYZiKy zL7YuwMZ|qIMwo@@UP>U{`i#9Jj+sI7Xkemo)kZftLGp*5@&xBxzU9{riNOiN8Fb>P ztku2E86f(o8T7_Pz+t5O0kDARm_Ck!a#N9<5b?}X_oET!>U5YEKY>?-9)iNfxwUt$ zlH>UG(;SkE1OF{?lUHwU>sz4#5Jr8ATsMVMr|p$2-)~JSz4sRG5&8`2%T_H$sVo_e zN0BrU;Z_p1Y_DTW5{13vwZ70{nw}G0Xml%55RwQ?W8Y7l(r2iu(zC{gK^wN%8QKZT z6h%z;Ql2jZP&_dUM}<^wQNAy^<~@ zjDpNLtV1KFrA$TLes4q4Xn4;78~G3}qq&O!#rfl)&FlLid`gxo5@G`10+2qBJRaq^ zy;+L`I!kPrx`R1rZX|GUH$Tdm}f|@&GV_};k z8a57us@qSo^ZZkd>)X4Exd!hzSh-pYek$1%LOuG9h!5!Pc7$sqIzzOq7&lZ!DNmTZ z@PRO`bh1D1F0ZWY1gsVcyJ$J|ao^|$gabu9?nLNTy?_$%${+OT#q7@7L+cL-z!ls# z2Fm&*+pJ+Vf^(q;Z9-ANhI~z8#CUWU*eqdo0J9@8At7NhqYe!ju#;o{`cQo`vS`Cc z$v1Z55V3SK<~di${zxun%Ch>;m9pXT(w)kJgan4f#0P0hA3c(xCMVsThCFOm{Lm}iA&FFX^x{DFJ7|XnVAp3Nm(4#cG(*o&{kGb!ee1! zc`2uc_X!6tT{-H8I5m(t?=aFtr4k@x1HMkwjc@)B}7S%vGnXr9%F@f5Y)G`@NQLHkwl%^p0(0ZuDEDNOyVs#T!z|1xyI@_78c_SUv| zNrvG&NG`b>uq-!?rC&|ml>^q5th&c7GwQM=7Z|n=DWPC7zQdKDgXe$kP!c@006+l0|0QH$cWH8YNNPw004}( znXs_Dgs?D~yuFQynWZrRKpo>0-6{162KSqQ6!xbgp`7gQ?353^=!A-B*r>g6-j?Hq zW?46`gxG9sxNNBPbs`w-oVcHbe1s7Egg;NugR-Rt5BbYSa?lbl?V4`wn||-ev(Z~1 z*x%=nU?6DKI$Sb9SUJkLY$q#=5JpMAOAjIIE^5rMyjHY98r4+9SB>AKKh?io9G;q2 zZ@xYClyZ8fh@gC>y;x}Oj{SjTeL(UHtDE+Gu@v<>aqJWZKMVK7+U0}v2SVHq>=Ol6 zNrk&F^XYf%g+8_nt!3UOOjEoH_jh$)qZ}Dy18uSKBd~vsqwDu2-CM$`)GN> z@&@JO(7f%~YVrbmw!RSjq(Aya@zLP<;TD*>I>{fdlGQ$a^{Umjg@*YwMF&jcXx%kf zU$8fHYf7(dp|3x`q$dWj1O}b+0B)>`;?Xy)z)a>y_Aew1m!u?dCQ4N?U7b*Sa~Z2i zn8?TgXrXOn08FqM0Qy}3V4!b8=oG#tuK?)Q%T@%lj-W+~M-FPjE>g_PE-%Vl>FjALlo&s=M)UZeJ!^^aHj zv%AMkY4gO<$|M%&C$g}JR{}rnVjwoIE{r^xp?el*K zmyP`H-8=EfH{9>UWS_t@KW4&*FL?@A{R_MgjsaA*&)yGLITN1*vC_7X!{jF^DIs6a%T@^z?*~>}$ z`-}Q;Ml7j+gjY`!#AL7A9gG!=fK}bT$d^hk;6Oo$4KtMmf0GUfL3sO@>VCWdf;o{! z?{;NX@O|N%QwBwuPQqX7(f=h-4Xoj>uqtlXT#v_!Kz`9!BS;B=idC15o|p1^Os#^PjF@j zopOnCb8;o3-hAha?+4wMPpclg!V9gYr=x~0YqO^>SDI$?MI9sQALX7OUY=u&QI>fP znwDK&KH6XHF}q!L+n6r6SFQaaNc0Q}Xxw*;kMK%dSK%z-)8D$myb|Yx?PC=TzcCu+^Ttc}fk4yfd_kL#OXJf7+<<T6e4ulV<`0kTF>ia64To$kCIKE zk2`*j6FZxQ;o_TA+8^d_OEhI8Y-Rq^UYrybnJgp;CRv}v@L@W-`NvkL;M z>ryjKNaTuG2}=it30_bS-Svyu)09*~g@zs<{LV-U71;gke9c(L{aAOh)kf~J-K}TV z{$Sdy_T$$x?Tb5{D9QQ*<%k^jqj|ggtK9KCm1B*`Y6p*n7UNX(Bi`P;bUv@PxJr51 zzVK%a$#{l*Cv%!w)3KOSFUWqc-stlqn8Z!6M5&y7{OQazD1v+K;d1)6lM2Ysr+|u2V_D%NM6XWjefItF z{#$2(#40wJv`|xCvm!LyYBe z70t}`-M2sef{X)JP2=}xz8y)}YM3qczkLV`3AXu%{lGUg{E-8Gy8(uh51znwhFLY3 zSyr--5iskL0zRw$SwdN&qX#vPgZ9lY-Pjk0;1}{4vtMEOu~eD&73b8xnqDZ%S>?EN zD$~?UJtrt2J>%0k; ztlJf#T)@uM%d=c(_23BAh1pa^y|e;_XlDkT@%YI{g(~@MJ+~O-p&CV<=kd-w)lA=s z-jjYuG>{A-yEU1Y2YOs-3*!6U3`i@!w^eM=5EpCqB7VzKi)rQtavdL;>K| zdLxNQ7#<2CTy!zLl)b&R7q?7;nZtXc&Cqr-bR?#`a6(A2h`v2V-2Qf zyERH6UkG@cJpA@Gtmsr?@w&lYmdEzFSO;CJ^9e`4`E1GC@}&lrr8DHJKHr{T%v8RV zSsUXvCZ-%9!u=GZ0Grw2oaG0|%`xXvNUnUYg7zhFzXLMe7|;iZLqZHbNp~Mk%=9sy z?15X;WqcX6Y<)Reu9njAx_8=aU*iuteL6ibblzN~*4%aexE10~B6vc=^4n8XF-qtu zXh>Bzl6|+Ew`+DYOs+G|W17pL5T%PEOC|n6h!Gxyxt1^Zx91bVfO;H?RhR0etG2!@ zn*Xf*Ycdxnv%u-HVfKza7Q8+#i?9L0;L^sK3bqBHY5=U38WW5yUK4PEA{tm8!kA5r zR%mnPfd~$l1@!L+?<%Q=G@R;ROlIGC4AE!WuGlQ(Y1_U;Wq5Ej5nJhi?D|8|qS;GD z8f=zmVhf{K!P!Mopl7|`IDjd9laX|T$m{(%&7D%Z7hj>LQI|)zuUok$p>PfQzBIZW zY3L*z1Fqa%Ot$MkMc!P*t$KMRKiO{koOReBRNhaK3NVIj%FAAwFC5IeVxr-HNI*LA znrD@3y?kuG?8C|W8bdyrr%sV>!mL_(9^$w+t;3*$9!H~d%~kc1)n4X!x-cr;d3Q== zlS)H=kG7Tg?Ghg$$|2w%cMM?`KJ}fSKmYJ|@Dsub^Z0(Valzf8pZ7)nUc?v+IbI^~ zNnX@YJp)T{jRV;I=7J(PDhc}u|K+}BVXnha^Acryl@M+O^C=+4wf9vQE)3&Bo0&z) zirF9<3^2XMYss1Qh19s;)5Ww5@FCgtV2cKaBD!E@L_wMz6`wT{1igRMeiO9)(^ zs|&9pTyFbdBW86gy|p(* zngfLfq{dVy)ips{L0~z99k#=t4o^-iS{>^)z4+A~OM~yy;T%4Ue11 z?Sp3&&>sSsWl2GVvgBpcnctRVp`VY?sZnPVjahVFpBxHp0$_m#$=%<> zxQKczlun_YQqm9Z(vue~6#J|h^#lPXU9MZnYaALF7gYfKEZScNmsqYcx=l)u3sBW= zlSAWa=kp%l{TU^N#0YX#)_`ia%>G^&1!2v|qOm(izjz>bfzGag8PfF|g6;@-;k~cy z;Ho|W5$FOgz>sIH#>!xr&$j7~vA2=o8 zV@aApI`w|%^EiAOaog2GPbZUpsA5C?^0*Xco@Yimn!*_a0|!a=;lfAd-P)TlyqLxT z19xyn`{oE+_0E0^^*Q*aZr8iRYu*yXXz{zO~1epW^oZVIpdP*@-D z^-2>Ofu=AuLjWuV|M|RPjp=QM-s@a`Qu?Wav1o@~SUapV#AuJ;aNM#QU%@ae3-tLq zyIOQ2N8R}cB*wMs^C{AgjFp`a1$y5Fk>i}=9HxRsS+eF<9Ikrt0;5}$FjI2wBh?8v+0K+Ym#LWNhs6+bm#_W392^}B+mi-PWx#;gBCWgq>dZLmT* zB>DbYET9P*87nkJC1Y(s9sc~MqPA2x1IPisryy7t`<|+ke~sFIH=>l)mv*#F9{QKAK%DWG~d zTYQ-f{(EBocc;Gap3t1V3+dha{~?Xvk*F`C-_dMN8SuY`CjjM+OfbL~C@%e1SoTkj z`In*#5zribp!jW0S-9N+GcS}LWB`$s_`U*3hjwg^Y2c`qFGH3=2A*kD_==-Pyv4BTilLyYdoJN z?;71!eC$45CiAPqa=l1CMScKntwTDk_`&yU7;c3#L?%lo!9T60jQe7o;xPIEoqedF zk7=Tjdd95li0B+wM&g=1OhJN+cjuP>nAT5PFiU!2vc|k;L+NGKTIc;`!br4F=Y`8) z%;uQK_Pxn0yn4Hxg#4F5_|dIa~#w zg?vfL!ZG4J&u{h_$x3BP-EYQatpM93u!ev**~e1V+~WI4ulYWH>cv^TH596_<6hj5 zGIFNhUA8z9(IzVi+va;;Noe+oT^`3EPHmehNqB!(RQ!0uy1L8~ZF_z$%xjCam8A6G*&Tk|wYArj%Q*_FOFnH|nTpTz)j`9J{k`#u<9n znoZaFOS$6| zgTtB;?C}UGc+4M;`@vzQiE!v2GdAA&Ag)rTX(XCcIQE9|3GpiCJ2;#fLgIEL6zjbA zF<{gsyg}f81gA!CU;6%P_;xC8{Pg*bPQAuFqboqaV<&jRR=nBXIKWt=)+~7~B_J3R zU1!)@FYo5MQ754}tT2^NzMv88LoqXOTz0RBaLc!~8q*^~%>Q^qd`axH*kD6J>h)}0 z^@epSoD|Q-A|pqn0H=Wa$xx)0Un|hZ3Xb? z6COBWpWLpR#*MZgL<-Wuo)G0^rfa&XU0Vh|)U}xaw!Ps!;NI}fsxaXd{*}`*qd2ei z&{wzX2%pqrM2tg<@0Dy#rX8TmY8s>=R_6_Tg?w{k2k#~UmBG3p{z-#C0zY`W5$;9H zc9bLT^@~xS4^+y zt>Hwui>*P!3u>L5q^lFKc3)I7IaOq42shX@U9DUcuek_)u6zNj@&m<=Vy?4OIYeTi z^H#98ZsL8>Tva67S0z<`Hy@=lrAjZii=7zg*h&e$gH|8?&1`mqRGTFe=_%_EphX{6 z{Jdsr3KGKibO{x+9UU|!Z5KPk6f+ho@z)j{DIPrwKJ6#jpLiZ-n$(3CI;;u_u)_`v z^j~>s(@9?}*;BVKeOQ$I?D4YU7TloSZcv`a>%p)%oG+wY zD3^|s$mUum+peL|T+$J3q08^FV`=Nd$B{Ok3Bh+ zKsN0yZ{umG*m<(p8{~O>_QOb`;14!~24hEV_D>H^65`^~1*540{`X;d(E8ZL5YY<@ z#TTku`#SeqB zpj8M4SNvWNZuD`1=Y62jQwv7T*X~#MU`VBfZL5jDD;eaLI9f8ERuZpWj_lqBG)Ivy zq%IouB5hll&gVA72P{#pEV*3{p3GGg;I`Yy8yvsyr-pFuOz`fmE}6cbG>Ck<&A@~| z2*@EgBUDQ;50y1h6->Xjn+03<5Q8P%`3sNP4W8=Ktll6&R%XW+f~s{Wcr$mGgaJc? zs+m!-G&lu=C+Qyf6_MsZ5`MuSm+v4uaEG2_Y1~HL%fD(|ZuuQn_r^OC>~v)=(#7gf z7`2+HZ91PjIz}p33dz^7;wH2tIxOE#=$u5#Wu~R29Z(_&w%x3YJf(B;$I+=yj)QQE zncIf&_S3#b8sc)v>Ac(qtFE3rA36b*OTR3n@i-j3G2A;JqDt_B&cc+Lt{JhbRfY$R3DAGVn5v@DVD`Rn8YE-rOZnqG^F{=N zvRkUqA&*`szh`fwR>19j(cO45`Bo7pk~ zhb4Yp;%^dL|n*;UvDQQDmKK)*Wv>S6;mX5BG@! zMD~zM0$r?^rt-S0p|!378}ApNd1=a%mO`E)-8ETEp^Rfq=lJJ##8euh`P(}j@Xz4K zt9w#}2$)Qti+sngGcw+1({fv9>l6hapRX^#BZA%0ttYHOL}|5#5Lv~WpnAGa&yoE~gHB0BsiQA2E`Rm-qx zCOl%Co}Pa;S|8x?#?fp}>cNqe`WDkmFhUt70Ji2V7SY&`S?^bk@??bw{*-_nsbLk? zDyxnGo)mSrN^>vk5aO=z5#X_wV(RAp`s^tR>2`sy?PB}`#L`QC-1a5m>j#9gqUdCV zW1}UwPR1L~pg!M$@*PdOzJcE{Jz_GE9?r+H5j+QO<8Z=xr`jL4hvoWHp6=|dW*V9! z-z&2TsJMRDDhvvgIpjnW)$zDKw4S^^T2jU&<)OvtljeXh=yrTyEf`DqkuNEp3G?yl zm&3Pd3#I<$w#4hVRa)`)>~wPo7K$Z~5@xd{-`7}0=tDiOyV-nh$eXhq5Zbr0`$Y%H zl?Bh?mc@qA_BpE>xn0^4U?3FR3#Gc8%_Va!&f6tQ#X7&Xs&y;F?C}*V12~Q~2}|E| z(QDQyaM;Y7Pkx3V^@TPNYm`X&s2gGjQ}s>$qTBlI*V8xJDR#=h;xrkqlnbdpoMl^{ zz%;#j0IgRnWi{yaVr(x|TO}3?h>DY=Xnkk?2DhuDWH{77&ucXJbGy$z%%R85ztw%y z1Y?A<>&@(H{P65@&1pcRK)k5TK`M*)<0^i~5;3FXF;9JhJ>F1?#nDo&E@bw#qRvue z%#e-jw1AGWNOPIqyO!^fEx8|MGq@-nOh)iWBE`R1#j$S*S7dIOF25eEFulM-kF4uR!ch4^j|qZNa!yzHcrD5qmi!X~(5{i9s$H zy8Owd^U)Kx)WwI!52r%lh8|hGNJKBJ)%a|RY1#ewo3%3glXPNqci+m92++G~hjRs4 z&`FEzPqqhm-c0N%boCXP zfAnOp>|;}MaEgJxAB;(okPIIAuI6H@&T`oR5y3cVWJ2w=fJZ&)qIpPFtJC=_DEaY8;l*(E^KmHQGu=$QQIIBOLk%0ztl+7^l))gSQ&M+%gIcNtk zVoL5%GPzHyc_uWOlho@Y#WOqe(loSU&}-upN;j z!IK^?1AywxcA@v=C~Db{T_f(m=cy_>pZ1~)6{>o^~zfMLAAFku>H>Kx$9 zZg8iLr~lm$4`G;!)w^Y33ba-k|LTddl^su%0iRLLMCbJ}wX}<}%K0bPy)|s*xZR|Q zq{s&rq+ckE&P21;Yy|5FZuPR9zFk&tB=O&wR-~DXIW9$(Zza+0%Tu3i&bMfzbyMTz zeIDGHd^uTtpvct4n=ux>ugGEqpQ&`AH6`j{e$bI0&)_Z`lJ2Sk{4#q^6EVrmNXfM9u?txUB>O=U7IaNX-9 z#yeIN(s2%?I{n(i>zz)@zG(!N7krGNdiRQ|%~D7r*!TFSo74zek_Tzz?yURvjGh>l zz%eFAKpaL=rDnOc(Zf%m>H|rMQnnX18_KJS$M@Y4&>5+|pE+l`M#f>G#;uihj&3wW zrcdxmK&yUFNqU8BL&Zqj*?wcE}?kTRz z#mB>(%zOQFqQ1CXF-o(;N-YCz#e%A_5Xq$jzE;?MII7jEcX80&QOIASp-yZUOM?i1 zOx3Knu+Dg*n;xil?N(4eqPq)C1@!FgL}J`1iw}d^~Kh z91ioC?hs$}y1^MY;q$S>%)n^kwCg16L+WjiI`EFaP1sFp*VS{?tmx#t`^dC>eSgFd z)$XkF;79}~?;KvD4^OL;0SjUeY~|6~IEOU3t}Cz_L|9z!;oW`gWnC_J^*&$Ni`Ib! zy>0CCt=kGX)G!2Ga64bA{Wf;{RTDpt7Z=5k4l0>Ap;TJ$7cYBiCvqN-R-&U_e))~; zKyqNYtAfO8ZwajHtprst5T}tu{LIJ zf@oqpVg_;G5Th=5=3$DJnhi=0z0dYvEIB6gH4ngDYcSSEgs@2$4d%bhH(7v;Fd(ml z68-&)ZK-Y+jg>WMF|pfg-Zt`N366@G0dnJo@&xiiu|MAsNuBoOX`eYdm2G01O(p1T zO;QQ+aqY|Gb-tK)L%0uqBsnh4#VHJ^-pvP z|Jhv%XTLv5dHK7yf%HZUlav(^gZi6F+s%UE*tqR6i4m*D!rrnt@}hedp^@H)YN+>e zVDM+Z38b4#Be<-4K@|1-JZJ19VWO}SQJW_HCl!`*A7Y3`q4UuFY_ z1{QXIi!#M2E5Ko%Xwprr3f%B{7UYjUUTN>v=U5S?X(~q~xDJ9?%{|W+{CDxg;m^o^ zidWKLKqny(o%zA3sN536 zO>@@e6?I^<{>5vKiy^7=2)DGzmFB(ttnhb;TG_o|voqQQw+gH+9aS)Kp9Zz~{pBt& zBlHvn1;(&x#VnJ?*}8t^^wE)UyUecJZFy&XdG}8T>hnC+4-g%)(Y?!OfKgj&G8Aq; zozBmFgw`?PGXP9Y!g_1Ax=T89R^oN{m#hxhcdfE!ZZDgS(nn}@R0R``d$_Kdq7wMU z!N>-Pd}n9K$=qHIG<>kw8|r&br*1QtyKfN@F22D=d51vY#}(+N*mrsvy`QG4%wCUZ z8u=@YrSVHalWcAWT7R{M(|MJ?h-AImmhGv?v!yS)s%)R0c*~&^--4HGfsm!3kfq@k zaUUFro2D_*_mUG))wyx~m|kYKStA7<_GUc-$&(=c6~NEKj4Hyw20_I=yrxTLveHr! zVF}fnE8BA3*mSKV)Xn~k+*T^tK0o5e-YV&SE0}Mc$6D7nhDz&PA8WqB*!1j`t69J& z>lI7={oXpH#xC0Z*Kwr&iE59vKZ<~hlje?oWOsV$5A`D<6a|!BmcM_IbcnpXuf5zCP)5>DpQ-baukg8X%(WM+6k!#u|Vcn3)d3*uIQ4Bp;_vMInyW7 zsvuFVxz*G&`x*+|`6$8IoBQvlOy>Uh{wc)s9>16S&GrqwhioABb>nDNz0)fIe;)TKbgC97))Y!+6C<`_CA#IxL2Z z9|-Q(+C`bW_X-@QJGFy3I@*~w;-gdvgpV~l9KU@jfPzZ*;1`MQgS0qY*E#>mEe?OK zONhzP=)emy$zGX2hT>qe15qXFF1L3BHWs$z5)O~d+H|oZ%VUTBMec3}a}Z3gV3i`= zP;nxlz58OpjL{}_b1=0c zZRK@NTIu>4sbU9>JA+ zuZ+{_@(0P}KkYsWGc^)2c3){K`KsT4y% z&w&;$(V~VM-~MIZliaK2aKn-{jmQdoy#!9% z{ux#^S5$8$2f8T>-1wGg5Qq08fq$paMjsDF96B!J#%BkOY&WCkI<$B&MrdxD_m=Uk zJqq1pz0S`;hVb7KL2c!F@K+YNU038F_~mon$9m;8x6KM+1$%`OHDJRd%A#|$Ek&{S z;;$W-!>3nFA1I>AKKM~D`_U+8AH=LOT7jB-ktyR2qB>pWC=P(^sIUoS9{crzRbBGN z4kuBn{9}AimcrHk=VzKx0`Jgv=;PuKS}>vjAHIadBb~|4mK&1B#ve!7PBhlY@9r)= zygY$@I4&Q7#C%2ubZyX0@Sx$QYMkvtfd>JP(8}?bj>{DF?=X@Ei8mj&H-Y@@!+g$% z6Y878@O<}cDwqKP6@n%Uc>w}w(KD7)G=+%M$|xf{eesG(k@|=?!GnfZ_nUr><6AxO zwj$dN2D}}o-jN%m{8mvL_=KjYkHvO5!>Owj6{E33Y9NL?m5*|{a^_gwNyMRRWN^EH z9*jZwHZ$V+HweM~_E=wqZa#-(AjC-@!Hoau0`6L}jGANAgt(~d00Aq`xVI`;nrRdY zMxFF7wyS`;xW{&*nNjzO1Tu8zfj#dxP`cy+l`xIWUFFf_o#C%R)MW;i!~Peh z_$bEcRmWcstvgK<-TO;Piovrv19*(iI|(TsFmmre@_@%J{k+vrCCXDp7XkM%-bK?qL8d z0?egcI`Bhyiv9`b2JO4UwB`DTNwn@Zn{bv4UXRre<;Knjq`j}z7c#i66dU)-oeV}?Zt`VnBBEeaU3bwYKrZhG>Dd}-o)B&qePs6n(6CaqSeaYP}e z8PYEpo%cGw5APVY)%NkaH9>j0YD_JL(}di>KTx;jRMnN>UEzgiQIJULr?0{l>(qw2 zgO1sj?hM_!@9?ci(M&ex&!!qJmqZ|KUAQS1gy`6}sqAU`M3AyI5t3UJSSy2Xgmkv) zKuVo)6IZ?UM#Bz<@(`bLrvn5PV3#AHF!)muAGYJ`4e#!&DHWm z?65Q~2v*ja7)+nliYpk)p?q%K0}^~7Yk?wAGX}Wbt=x%Y00i7mAUm8^+Ygiv)~+;r z1m7l%pcT%0e}?<{Nmr|<{YpQUwU7OTQMz}>nE|J~r79IMW7@na9+-zy6kqk1cG6|| zA7h!}pRaEd5njq3Z?7Bw__G`Tg8HoPSB+{F1QtO*;og0wh|m`LSLRkAL;;0n~zFf zQ;dPI-%yhhRj7?a%=tIgJw{}BpGybdqlIfxFsV3zF#=O{El$@|I^VIaRQS z(;x#W!hF!(1VPOcRU0v;dxglF3+$xp5jRcs4uw(5b z5CYdt=!sIBM?!`od{vUdFhvkIf^k%$e94?)$!CPyZj#}?JTABgjQVzag-^hWMq0P? zBB{O+REZfn?a(UQhBa*ZNpVLy1Ve#3wEY^SS5my8`X%7Af3D`^^q^N{=BYO%CbR7^ z&ySZ4ooLSibT0({MN6`mdb0Q6eSwzvewO2M$|Zd+BHxN>8{zU8$=p^DUgNW^{O5f! zPoy%IbzHtiEJN6*{)?Z@yp6{kgO4}NrWs>a(C~cQ&Ch{J2L8HCp}Z!!Bhc!|0d+%r zO+63{;FH60oaJ^|NPfKpbq?E4ys4t7R~7q|_kirzR=y11CN z%Xs=#sGE}iP=!kg-v^+kg90@wXPo25gPZGl@6U(&6Gpvc7#Ufb?!cT^Zzsa9bGi< z^H=|rP{FSX${`)3gL4uiw7?xWxQ=wW_wqm%tnL+{&XJd`HUK1ia1gx?w9BgPcx3yk z1qNYcVU@`ncOO?ICrIjTVM>YBTZ4a&(2KZL0j2O3CrWJZDMnGX`>>3Ahu_2Y{7NY# zo@m*2_x9$t;YI-#qp>_(R_0qo$-YM>`_ZKc@B^pyVNS!q44wIha@WWTDS3tu%uR6t zK-#rt>yo4rYc=e$0XO>U^dNGwBop8K3Nml5ZejS#Y%8T>PF=VDGQPD{Qw&Cyr#hfSrbZpWZ8P6L(Cw4D%p9b>1$^9Rhnvy7s`P)2_-e`b|2LBPjCJ>1&# zz7}60L{$AOE9-)@5}ZYXHvOaT%8xg>YL_mV!4MRw-Sv!5nYFj?R0#b(;Yk`a`xscU z3V6+_%T_kn-Ex|`TW+RJ5Pk&)kuQ0>A{rRYBktGju4M%2&&byQtjjS|G(pTP^oAsj zjz8@ydslYg4PU~Zvvhm`=FGOh3c4tJPOOH2eLddvNzt)(B-5+C$I~&mL;8hwd6=&A zk%OMUJ!4djCSP-0)?YA)W3+CNHR8gz2*QNbl`pV}FVkuDEz7$d$q~puF5hqK??D1f zA*alqko96{rc@>f*X@Mu_^OYrOd6K?c*8X^IsFj_2WEuQIr4+(5WLj&y6%^Vr1t6s5aK@=VQl*H=aG392=hos$$kTPR`tT5RXJ z*o+cUe19UFQ>R6dNO2^?Fk_z8cM`ZTlDOzK9amjZDM zdOm)G=bW+WvLvUOonDc@XA@yD&9=-=e$IBB)7i1U$glN$&K2ago5+Y%pxjF#u=6LY zM<4AzFO+J^3nN^7jh=6A>E?jo5rw)&3V)D8o3R!`B`GLoJt=c>HxZO~d4WAr9}dBK=kI z<5->9LS!&AU@q8>;^RB0p;GKuA|d=K7q0o&Q!cs3HYbN&;%@i$M#A}--YK*EJB>EK zF51qfR`p_3a_PrzX>~O+W4>|5V_e-( z@;?rRyCDP~BD13-g51U=g8-V|)b~ZP?9TH~CvqTOu9lO6A6^}WslkFbLcJ+QuVK{+ zi7#uIIY}*~RHwhzfWD>k8>`rwwT>vLBv2gU==lskSPc*cQV9b6&OBY?{zmbvIsL-V zpa|ZYb0bS7J}ZnXeB6ZKw~!~_E^RdylcgQv3f@H}1(?;n7i2U_jl z$F0==Annx4f2ryGje`y3KoP=$9E^U0zfPnXX~GidV~LM6u&FcuLyY|gj7R1d4gG)Q z??FiaiQ)a-CWHJzH$ocdEkyr~w*3ptdyDd$4C@sJ@P8qPHE7Mfz1=R>Q#Xg&zemaeu^5y(i+)++2UPv zJ#t>FP?k~gD}_?oh!7?7*$M_q$=6Xy467)T@ifVj3#YTCv5xx_%&y6AzJLG0sLMYX zOC@KUnHfP?BnSF6BuBEqjS)BY$3AB$F8z=F5|nSZt4KjcE%xyYnnQE#xE9^9KkOw| zJ>4jw$5-B?F=Eb`tKfkq9z!9|=R;FAIt%o~nb`k_xg2nZw>N1gFSPlNhD%LGGpFc} z-)y^{)AsZLfij38K)*C@2g!^!w|A-vJBR**gM$iLdVY6GY-rmx9&@AU z+co>;Hf5adB8Z4`UZdqI!t5cour*VsrRWiSEL!J5DvMak5myb|`)xz5_{135* zbCrcdQ{FVnEqQEkL{HKaEk`pK+8ijI+$`9P_$eN|L*XH*`92Hd1?Z_Wc_Qj(ipjF07 zd#+4_Q6`1+$JW-e{nqa<$*c|?G%lvsdDOYNv#Ryl(7mGfm%AToez82|Sp_HXSml)I zZQZXu3)eWFqjEd0RoO|LI;hIUHh{ZUgZl57W4HHW4E4u*Q1;MNuiO(EbrTMFG(PTx zo!f8R4X!y=f2op(+1!RKh+`5?tTz#JQ1YRQr zMaVnV?>3!c#GzP$1($kvGNYr(%In5eCLrh(*GO>$P1Xyby>Sl4@ET6D+22*4Qpb~- zbume}jDSc2sg2gOGZsoQa;v0x%bd1f1qREf^!OpRj<%!)y_PeFM@tTZr~4-?CeyAo zm0k?E!Edzoc|elQ9U(V5C- zvO}06Xng!0S%e@fa8avNf9#gt$4^59D7RaV=39D}Eudb+ynjp@cde7IQf+;Ws9rHn zNcSxa_lKYGL$X7I_3C@?#ib1BxvFtmSIwE3UEl*ph6``QF^f~(cSzeng4H=6oTz2KzBS{Q72?8p@ZC#{K4KOB_6aQ-m;^T?;s1Woq%oR z8EKT7vc7c8mb_B0RbkH+M5-(x(s9EeoCPOuB^*O3zGCK_n&eoZ2n^$4lAKp!hi?zH z9rVpaz zIBIeG45M7}tIpQOFJ0Z6S+t+!Gtw0?((e5+Ch`x=&0$7Qi!uu8cuztlztt11sS)M#DsqDnL+6z-@ zxjiu$a(|ph=|S&f8(|IpVetzzy~xJd$3dOvcI~ z62(`vf#HXKE$D9+bb+`aKho=wEvMCBM$6VhfMFz4cIZHbgspOV03uceF z%66^g-+3Yvx;R-nWBp=J&zsQ{c~LM zujO5kEGHOx262QSh^0wMO#Q9=MHUyQIUz{mS4ROoVRB48qZ8X|q`QrZ@j7%NI{3<_ z5aOnP8>>ooUG5|1!_Sc95*pbnOtW0k=LZE*l6yfCUh-m2r6u^?t3;A3<;{VsZ~^`I z4)`N+Fw{Kj@Sy{t`K8jY&YEo3{Udkg*mIrqHgT)`!XJ#vBCU}`-!LxD)2O$nC;zXFO#Y*VdO5K$ zq9G`j2!9>;2kO1y_CEXh{%&P_yDiuSA+iVL|7Aypvq`7LQG$sF<9zD%A6uAx;o@F^ z`}^Wjut%=_Oz6RSwaRNcVdMwpD1PnwOjZ24(s*vx9fFLz$O$)B&A3we_j?3}Q^zZ{ zQ*gy{*qKB&59oA5wcWy$3yrb}edVQHOc#6rhj zMXD_rWxs`wdx6H#X1a`D!x@+ee`W<)bfr=k5(+q5u9(%%K%U#0qpILCgoRIti$p_9 zSUeuu^axvF3{9gtv@sZ$I^hsYR$yO4C%_%NQK}Oc*NL@`8dzbCR@w4=Uw7WFJ;a47 z>saP#U$n?|Z!w7C{}(%U$&lp;0T|Y?O0^cyvwC?jU~OYoT0Ly!^~HGFwhT#idf$3q zvV7O}S$np?z~F*cRc{NlS*l@8dd0Zus^5p^<323vweS4@=z7brIF`0+8;1lB?(Xgo z9D)RQcS3M?cMBGR26y-1?(XjH4g(C%JK6WP=lJ&btLMiYE!EZ4)#tj_s^NZT?MPuT z=iZ1$<<9+@=;OMx#o?wEE75oGCMcZ|ZO+i*M%bvJF^JM+PW7>KJrHCS+6fseo<3FY zJFjuD8*vG}MziKUU2oz_USm|8*7brU`e7P~Vy%mlpLeW17=AvIF}$)i-{Aw0~qs3NZ~z@CFrLd^npsP@Xjl z+`+cjFNWclp%CV!D9J$##uIOsvY_`<>|dI&TF(|(1CmBMBu59tnvL+xHt9W;2k70B?Xs6a4QPOlM7SjEMC16+uTh`CRj1XXH?#vpmk`h)lt?0Z4JKn1F@ zK_iL@L^2_B?`S`QWf(oE%SJ!0$sBj-KygV=B*8x&!2e6}==>1IZ2|^JAxTz0iMK3C z{zyT>(nU=$Jqt|q_xlT_R&HeOtjpI;vk_fk%x}7DwnmiIqK7|jC*W( zMQVwBDB}%!z`Td#R0v^7T7N*!U376;jpb_$S9D|YSMuW{SR$zYJf#|Pm9|;z2U6(5uxd22frMk zg;sECI1v~X9&if%kuROcTEZiT*yMiFhRrOrhx%peW%{uCP{Fm1Rj06P~22dOC?}eD(78!~_)#&-M#3H4ahP}uDU+bpTgH6xd z6ji0DUQzeJQZ{r;D4jr~pje^CL~UVR7NQ3y;*6BF^kcTR74Y0?JLQ2QdOl8Kg-NZ$ zG0w~D3cZhIDP~dKgd25WeMpt@TY+Qxfa-1=rG6i zjriW%Rc`qRD-7*L*up@uQ;9v8w#m^6;aHlbp}UcELiT*iysI}qGtD!dD~>B1d-0Kr zGK9mlfUd^3tL$O}^GDozsWRE&+;I-NM`sS%;t4E9|G)V+c4%Q{D4l#eqOi!)CoT-5 zgXpB0qD^&E-PR$ciIpsl3#ffwKijQ(s@6s!B>8O|2Owvi3yy*c)oBYFJ zte2vStjK41yDAYnpgrL5Md6ZgR`SNqB=bON^d)|2qs?9^+Ts)fszF*xiZugWwN#a} z3%Ae`>P@4svER}g}oj%V{+;Oj6X0_2!>PgU&qVS^@X01{ggkvLl|5!5;=vsBjzgtFx{Df3|0 zVldAmbBWbtR;Jo>ZG)_(;0s^r=}UC1uS~G;DKuXwb1*~*8k9iaON=?O8;o%sVm#u2 z0kvqAEYwx4zt~yb!{>iZ&Fv08gz=$QSW!l%=b)~+;z7S-N`P|+$v;U8z_NP~$_t$e=PH1=!=wby-ZwDsjBMYbQ#uhTtj z^MXQw8fCD`=0$6t0*}mY#aZrU*M7r|f-83}Za{u!^Xm{LHV(R5= z5!{5oq%}A+eD1yiVYZvmC!E~L)H&uIOhJ?WwyP))I_?X^+77@v^WMCHJsct29>D^G zesLNLcD|~APnri@B$itJQLAs*OYKv7rVY}#akY)-UU=Ap8CF@4KL-6IR)K6rG1<2N z5zV-z;rMHwy)wmT^>@Ng+j`wM`PKSza2~nD{M)!flhO6IM#o@Kk!_DvJlK6iy$F8Y zQLH@1TKDF*K$B;_Qo9c01PrLJ!HQ6&Y@LHFfYi=I!AJB*_p2X$qa~Eg_z^Ej!MIc83kx8 zbj)-l--oljzod06JKzC8a7j|Uh3~;*L9kgm zJ-pqxt)v*JXAw_>9{iuAGEoO!>KlGp`79GkBZTY(@l9T&gQHFfrf;M#?B1n+Chn*} z0A)6nv+$hD;FfXx30uDK`RC=~JIvP^>3Ss5ZpVY0f@j4)Dwe$Dfrc2sqxsUEa)HKm z45xk>CrWvcl3DsguMtA$!_gX&f{>A-h!Ow95cvnL4r_m&lgrjHa-ZSThK_q#tYR6> zBkuSoJE@8m#@OYBbOEyVf$3_!<3!-lv)ftw-b`=8DFU)&Fb;Fc18yN&Bi zG@1Uwi2nkdaH1ff$ZY2p_8%a!hVC~^wH&F@`v>Ixy=#;Z&|VEUFH!z4^!lSQzNmxZ z%)Trnnza3MDSvfDNn!-hUb{QDNdDt&1}lEk*$RdckJ$fj3>hf5f*mf%zen^x&gQ!= zh_-&$)lR(qJJI4FeCp5qo8dx+hW^*twEU*6E%o##V*eTf|GG*;kZl|V1_}AU&c@|8 z!*#LNoR9dArt1bVYhncafATf{bvCC`zZvdwO&QjIG+hwM_pgCMeZv@eAlPciNHU#e z&yT~oZ8H>0Cb8)BbI;%_t!-`rVxC^$M;O_BT8w<%f%%`%Xd|)VaOD5c+pjrF)zqT%oz{oR18U8TNx* z>)T!~{k*pO692TvQn1jtB*xM5uQ-)b#XhzrETWj$X%p$sNO7S+cmmT)vBgqSA7lzrzkC8shHUoHzgk{B=Lh)y zRCL%MHP>@+q*(hD`*S1}2WNep6@yx#lNz}JkW#Hjr|+XW37{PC@K8C>ZM0Kd_j!3K z8aLcdg<>ty7dGd2xoM==t$qG#xLsAJod9gm^JGHe$sZ?8F*Wu`AaM@AkNQ_fB4&WH zvFIXk=qgHZ+&u=tDu><6DQ%?%OAUz>c1yGPlQ=55Ay66c-lo4OrEV_eZN5$5^7WOc ze)5H#p!H04J}4Y)vK7G=1b62vm)xW?7Ex&!TG769i9agJVbfz4Uu`|!o=|oK1womE z99}BLYD1C91dG0szbUIWrwzx!O;c&%=LK5>+s}#@!B@tV(oNSUN(or+QtHj)CCN1|wW_ovcN?7uK?9@w)}la-@C# zD3H1^5SiRf*G+@n$B=DBg3Qk;a(BT?tV}h|yiuxFX|);+Yzq35i!TqFIa?+q(;X;+Z++?H?ZB zg*u_UiGXO>&4q`fDFPF<#pRxRB^a`=3X%&)FQM%R6m5^raU)5U)E9x%3c4-uSTtYG zS2#-NQ=bMib_xZ_DnXfu7CuS0J|(oq?bwwWJXFx5+uIiSXC!D`gZi*zFAN}EDrmA= zYSm3-(xh?IJmD%za_5nFZ32+vnc%O0q@aC@tLwfIamC>7Z|+xa!cNk8waidXr^|8` zMK!FJk2<-Z3>Ymy?wtTl=Zm#IY7B)B%;j6n?J6$wJk<(=yp9(iO^dOx-YjV6kuSB zNu#kgolCijlzyq`wgaR^5ZrOk5H+zs2!XE=1n!S%HgHw#T*F~oZ@ zaBHO3B6#%aPaLf`vz(znkcHi+2(5YRr#X2_yv?f%J~M}ef(JfHLZ3A#b|vjN(-;Wv^UCRDsFPS z-GeEgE4EL0`LOXs|(&ZPUOn(F@4|Qdpo<(1?50?Rz<#` zbWA9^{=Dbs^Y3<-8Vth}OsrRZ`kc*Y9utN7YQZP<=iA>E3EbA2CggEq3iY7e0>uws zDHxSWa*sAXcC3V&2b$>wWwbOzDbSHwDmvDc`b;i79y51BVSK=wgT@Uv-fLG~NPj%5 z)(G8c?ps|K4SXu66-(|)-LZ}fg7qW{js4Ln2K6k>{JZP^CC@4x;!wY4t_qva2c(Z0r{Z?~aFL0zmk7A?^zG1B zW1kac1@{61peT^vPk~I$QA(jxU){bM1q4hnMfRuvhQ%MX z;PSh1p1=n-5y5xhBL$fu{9Hj_QvuG_(>bvo=6QqEx0qFAi)NNkA1znQ{BgS4@|93s zgcNyv&jWt4`n4NFlbtgwaX*14-<@xW(we_y74blOGbnY~6(H9|@ho5FRFR#4obw;3 z*->UOG6+G8oaF`o$>Qjcx*|$Zdu3rVDa)zOd}ErrRuZ=BkXF2jM7!=8`)Z)*V&KBt zCQ@w?84@m@o?0@Iv@2{%I8= zH?w8cwNxYu)nu2z+V>U2>t#E-oU_nL8GDnCX#!qhm&(pOFCaxZ&chyXpLf~^Pzu5A zWekc$d5C`ld~r;-noUBp5^q%2)ioB{emXLe$ z9f<&sZ9Qzz{q(>4Y=-N!E1&!&@3y+b!6=z8;d6pcn~T`8${87~{KXF}N}rL7BbmP^ zuP7mYC(gW};OtQ5WmA((TATZ7;I|aSKJY?t_eHyY(FD* zL?+Cz0P8oyUOXi&eesc^9Iz#q9tFep!^~o|LRek-vv!3{Z1BKEESFRp0OB5GZmn8< z@%0uuH5rd{9wW;@dGb|FB$lZqLZ`di=X$9qHlj*v!M*PIsL~R4m;x|z>`J)C4AFPR zWlV3_!H{I7eA_0Nxd1ZN2dlM;V8z8OGoC2$diqNXzr~_E=i>!dYnvz$6X)Yz0adrO zZ?xziu!*deto6(vM)ip^aTI&ekqSE}1DjWlu*Z$B4zUJp+i=B7)GA3uFYvy^Q}T+O zwI({YwBimjs01MhoFWnD-Q>MH|qS|A}mKij6uCIB5(mGEi z(yJ#JR&*!cxzP(5Bv(UFpefaA5L*ad55&W|=`MckfrGQBEB{*fdIj*RHlI3I>*oJu zm137UbsL+$Rnxqu(RdJdhBe=4A|Fys9CUVIa;*Z*oQhK@K$S@Iw z$65F>#GJa3Dks+LQLE8w?S)xK3&>Kr!E4zWz0Y$z4wSpstI*NZZ1RcqVxu$QLsmVI zAoNrak9m>-#qb#aC|m1Lc)w3W8w^PN&hfM#`Z4P1?~m&oa=X$3ToV{cr2Kv!c|xW@ z1yZydK_;AAklF3v1K9*KLZsK1#)Ek9G(6gJ)f&g5nkb%zJE98sJusU+g)m~h_vJjB zJIJegBnuV!gkx2?cz3vxj$0wpob@DXICtA%CVYG1X@Nq=o%*Sjj+qPf8xH=^dehY+ z;KSdrmIz8ZiEDUTp{gcmm3ilavFIc^g}^G|BEfRKTcKg%~?A$22>54xq2<1tkisr;b|aCKxBDO>BFxom&R?r-(g&f zyDIXo6}Gj)d<9<_vdo}&ndVZ!j&_RMqsi0|By=Uonbt5cX^wJZ|I@?`F z*TLs<$VY$e*k2Dztf1_c+83;am;%8}e!S#n1Avs|tY?>T?WE;~`$7{IlMd^W2If6P zKR1^mAeh)Ueo9l{Dz{yZEcCP2PRt(q`Ge7nG5Q)!c82CSdv<;95!v}k$neZLGSExp zNqDn(3*9X{2+d8TiyA5yz93<9U8}=ohM_PEk$6G(!8r3wVsRdQ-NKt9PP6=B8Ox z3#wP)1UaQtQBX9-Ustl2au-8URqIr|qqM=Ae0HGG9h=&q$d*wWGkE*< z-`R#~K(WAFlAhh=bUs@MZa3&CoNsYPW6?F9Y>Q#$mKnfbAdG6)D-&)$MkrQ1G?#5X z*?LXz;F9RHv z8I!5@l8;}Kcws!fZnvB1BIw0y?DHs=SChSZ_zQa|gVVgA^`#${a#Bw^9e9ooWm+2` zWVsnUM zz5c^<9*&^=d_m)HiVyGiWb~^Z0-5gmM6E_6j+DQ`o%)ya#$+(P}rE zCaQY?+gFw((iULI!n`Y%j8)0_Vm8AA1;FG&NX+5Tl^_;Lx^IteeFSMIw=IXhBIq+Vow@He&_K+Ny=Q4h|FTPkP z-S{z))JD$6{&Gsam1y}-JNG{RDZL0P;A`xZ^M_H42eb`YNz);s%XyNjg zXcUsOSL-iU`>EUP-Z57C|6;DhB#X4mj}~KQ!l#2GR#+8@`r)Gn={7f=WWj{xTKW8v z2v5(PqgrYcWst=?3;T2lVS50-gX{Kg;3i-Rm*pv*HdKQFJRmDduSR-@Q3%Ix``x%* zS6x6OBAi>OpKWpyLXQw&UUsyoT-SD$_GFDc83$rdn5C*>m6Hfag^uJ#Ayf?p*_+0q+a)bl6x((uPscPc52R z`%oDGEDQ&n=`ena??DGN0J$qAbW!lYgjcs|IBB0;&PV{xKdCu`GF!D0ezWv^j?z@`-rVuct_O_jV=*XJo-l)H|8x%?G zYCC^Wdds1iUB%|AYo!my(ZdIe67fb;wY>2dA5_7_PwE!evsm!9&}55wO9l1UYamvhQU15%{P-Mu<$CvaG<*R^ zM++mDy`I1LW9J#A9Viq};RlKzy#E`@10F;^hE%#XX@1`~80?kOg{q2hEgSKUIf8Xs zPCH-mtfmTnUR*LTaVJz2WbmrfR`;~P!Cgdl5dpnjD+M7_O^~Kyv`;AdSy%?FwTiU+ zB>)dllWzWE(_`RIYz@b^0IC#n#1*Hjjwez>z9jP*h6b8PzKW34IF1`4bn;osP}q$R z^-CeHdANMQYs0sSY-ytH(1U2lbBbn3B-L(H!e9tBAorMVEN&hap+nn549#kdd2tZ2 z*lspBNw%AD0vKF87M>2$#?-`ev42St;~R_^)(eI-y$H#1nRKbJL9HKnKfmh= zzSFaRX`ORReT!mi~TQ-W4#4L69AY#v_e3F*~9*#8j8OnkT4JJgFsA4iH%je+yRQaslgdU~_ zNc8^rs@lJJj=ULlD7amQ8PIqNQ}>xV?E&PK3dCfsa#@3>2hfR%z0>|%P|8qtNPJ*b zrRAdm}BjK4$0)mDi&FK_B)xeAf$NBvy!-b|FnT(rT~JHd8h4X zk&P0hQ*9gNnK}FOQP#1x#-k3ZC7pZfU^X0+uD({Cj2Q`Hn^&U_sePL2-__uQyg?VJ@&W_|cRGc9u!TMU{?~{C!tQgl0So z%->`FrC^=YgHE6QSNTv2?_ zE=A~VpdI}iYIidEM{1Z5>nwdkr?Cq7yncQ-yfxfw{HK^>$NO&8z7%0quCW#K??XRrWq#Nxjs8SdW#QEty)`}|AA`F)7*vcCl)S-ix#{}PDo zehWkrp#%RV5T*VWh*CM}OaDtC+Jft~LOGl*Z;PkX=hRt0@+niCf%&c3IHv!rNFKU! zCC6DA=i$;0v{XmMk~%2%3x~Tla|ufZJv1tB_49PDJeK?B)Kea)mfL=i#kOKTR_`Z<=B|!-ceDK&drgIhv`4+dsl%Q2a7>7#VD_7_=jRHXm zz-Le}+TLbYPmS$`&#y}BP&FkDV5LrdzgNsc=Eqx$&wC{aUwW2hK?UkdZYHS(UaiYz zT4N<3>drv1h^dd=MUEhN;Wfvvl+y)VrT8OLbfag3kcR)sC1KTKcNnhAF&~uZWXMEt z-j3|KP2j_v@v@I3funp2z#4Rwz`ivK=!yILMzcl=$qA}6DrEWP>BwKju^+SZIPS;u zxdS9C$o0kE{d_yu_1pt#;qZi&YPmH_89dI>IxQ};A3l7@lW78l3ioMgHhdLj*5%{O zvLpOoP$FT>>Glr^k9tj=_sL_dpC?}T#1qoe=s}sib{EkPPgllNU&cotek3PT@6pA_ zf#TkzDqq)rN4KyXxVOkTZ2%iinl-OJUY&Dk#tMrky5E@Uo9^zp|4Xg9Bu7z%ACPnm zSMo<~2_?iMd3qD{yZ|?ut6O*XQ4x+6CUzL-IY1zVBNrk@&*+|k_n9E{Z1r&VTxL7h zJfB+|`Q?#l+C0E^ZWFU|?gPlsSHcwe@B@W3EX{Gec#ua~h-Jf8{j5~EaJ&t;TB0o` z>`fH$(L9sjm3I};m+mW$>038y60+}bH^L+tp*BV^RroR268d|$Y>N|KYGt-b{((n>gS)k9UxQdn;WvNpyoUYT75(;$ z<55ph2_($ucwedVck^W$Os+N?_80P*JYwe?zQoYb&~nl(o4LUbd)ITHmS#-lWE9>t4MEDjb)TBo*o;x)WHBNxROrRow(A_PrEAYvE_Nv1Zpp6Wf8w5Z4C9GQ2Ub z(h~xL;YU;_P?e?p-38BgGsk-<*C<1SMXeLhg9>~Bx7oc=URZ7k)E91}8wHH%+C&Lj ztB-NMYV&k@1|P`TZ*%(*UA2rKgIYn$^ORknM^$^!%HZ+>>wLv)&ebN?E^cly@8q>g^6@yZ=G#{P+*l)lVp z+1c!5x%;u*dxVk4=O$=`ai;q0r1K-IQ?G$}xtd6%3G-^TwQKhyt;H#nue!_GBAO@u zJu6TD5SCTHAeZ=o1TO~b!C*DnfC(U^JiMAiqkOumH_KLY23x(I=%IYm^~=&WM#UmG zp!bciO0Ir#W2?Jc!r^98kqdaf(Lq$!o{k3*;zwcthy8n_KndkA-B#xc1aEQ%BtBtl z|8i(EMtb4(x}O~^COJ{Fc8WBa5LhE{+|QIxXBlFV^g;oth-!i%w!_koiaHX4iI*)M z?mLO6N?!>A%&C0OgTs-n1iA_N+%~Vb0v)&GtEX$1fHp8qk6ZE?%uQ4=2O~HpR`-om z6FU;}!^?AcI`wHigO><1yP8ZBpyj-92m754|`emQj z)d{UynIXG+O-u$+X^Gu%SUi&Pd7NZ|NmddxZ8x_`+}Iq{g|0GHyyv9^>sM$1kwW{M zuRYP>nSkMl@P)#0xA8s`?m~oZ*1C#Dm$T+cq{olnI_>sXk7zf|@~~~DURQUZsCw9a zAZ3)%F&>sDK16S}i=BuFfL6Ttffj%7dM}j8e*sNJZt$F@x&`ZM)I6wg0IGdU92R&lze>=(Lth$}*4E&mCEQkz~NnZJNPhwkVVgB82saTCqt<2T&=ip@T_{Xh8V$ z5m#yTCifVH(+8}=#)Bl4$tBH`{o=rWDKH5gcfKw&hl}SXQt+sb2^LkNK+iuOjCcDN z6FWD&kt{D_Ynry5ED|_hR%Ki9)C-JLuXTC%$G;cIW-3OMmZt!Hj*204p!hR#`sFTN zgPrXA{U;`fc6NCkPMLw8osUh1XOzxe7EH!ekq(-hoc-Jb7SIuR{U6-!o0LI8gS{!( zt{L|mkcqH9pe4%$=o#4t%Kh-cR)eQqsTmIvoTEz z8yN?+jZY9-SFUeIqD{J19GBjow`&80j%3sqt^vmu#YFxlctV(KYvzP*mEgA zhb#N`JABkZI_K4~@G|*mqpZSq1?P|EEx*Yxxb6lA@UNfNc35q3owzmK?n9?CGXlka z*Q1@IqW=;a=xxwva<$Du4OQ$f>4L8h`)U8FE!2NG84reN}Z4nxrGbnqfvPJnpWp`05p=WLwho*^3MVrP7=%h#FuRw zK53nst3TnR!M`GYBE~a4y?0QGg3JstPTQcHMhy&cM)E?T-B2ivld}L1B}UAq4yp?C z8Cd=rrd7gWGuqg)o$(-OzaK7-_sYNNT5kztJO_3bR_FdT6#pKPO~i=bjbJjh^pJ?h zBkQr_ntd{0C7#H-#}AaL??^fx3}s&~^7cd~AMO3@t0kCXs52~@Y=v3GX*(JeYhlkk z{vyE*j%wS9w){qI=;xbpiv4AFN<&1LG8*(MMDSsTc#G{BaEs5%9X=Fh_R2sB6Ozpw zqpu?SI+tO8;El}-XWe$emMjtjD>0fp1zHc1h?1Gld0Qo%Zz$7qs3BWLff8*KV& z3Z87&{`%rv8PV|j-I>54V=a{@Jcz;uos4j-q&B+?=;(urH(9#jxs&rT3dtRAgK4bDOVNa-(?ifhg@$a zSw)h?)5*8jV$qSvKhV+#)AVW$9Kf^TrSY0QXRCd~V^l&W_=?EjubY`VOY;DZk@Y=3o}&k-r&7FK{`WreS5dn<@!iZ zkB~JAde1H}9J^wwW81oK&MZx#Fhe1Li+29r_}37BpBEukEpUwwhyP)EX%Y5H$LTNb ziJuqx*cJb7>jxPB`@NpFZX-d5@ez^T^xTT=`)-(b{k-?to`A1IE-*6%9|tTt(HIdY zdqTAg{O-d-49NM`kP=*haQ&K`*MRK>? zO)^5CyMjUEg}Kl$U=Ym`0SfZID|ak9huf`R6K@b!Dn|cEs~2a-WGKlV<7P$teitI~ z*yCUT&X||m)ot8k+m&bwpAIbOU8FM<0&{3`Dv>%J%{er*Z`XYOvGL`%Uy0BFY1fRB zjcUpqYr|_trh)RepE1_Us0o!!RO5?R=E|Ul@b?Y#lv$G&@No6NIbWF3^xFCm0QMmp z%=>x0QV?Coq8WI)ty731NzR{)0p4;{%LI)p=H2Sh2m z_Pf^4Z6u>ct}wOXy0b)rlDmjozPMsg5P|N!F@i-8Vi8dV*B(RIPKui3Z4w094dxWm z9h$NCaAGq95`;FeOXRg13KAwTml3zSFV==AHJ?q6*z_f8fh!!?iBw*>C#CZfE$sJC z_$;ZEto&;DuenK%=M-cz<}9KEqw~Jcxe5JJq@xJgx6$43`pX|5k@lA(gAIIH)V7t< z9tXPm*wEWo8+Y!8_j14yL=Ra8K0vZLumh)5lm=ne+aB-iQsyW?qv+_-s=DbA+^uOe z846sIKMt=-r5-;O3Yaex1XOKaMyYxke8-Q*=s?r7Iub^o@BrfsP*QH2$UIu+1p~(Z zLsqFuxElc+m;nMw?cjY6@NjADooX_h22__p%^oQuq9TA^muNtHL@F9jJsaIu>6W*P z*BonJbRl8@UXuk_h0>i3t9UL+D%fe(%at)?xbj$ZC+FBP(x9n558j=0}XUbdOA2T&B7OuP` zpJR!SKHEgit^Mz@+x+|q44Q11^rm*5|K+olgysppG_jB$9Yn-c^#Cm0>-`ZN+vgll zW3}-9NMc{+4FQp6-^@)Dg*Rbue6qm(DYirb#K;f0(m4kg8;%ONMd6GOiAgSpAR>k8 zr3e{)HtVR$Q~GvdhxlHDZc9{uo%Q)?6NU_wKV)X=6NAh$3UP&Q5Ww8iWL9E8#Z~dseLu>^p0=z;B%#2)ov`=go&H7{{LyV2hX;I~a zK2J@;x9xq|{hVCA3ksPQ@f#K$)=^Z4K>na}QLuhe99g|#PptXS^<;l6zoF?6Gde1CAa%gM?b%Mhus9%;o&<4%slnlQQ+eP(`Rzu@{(j3sHmLlYuY=9QT z{0*h7S{Y6T+ny^dM?Sr^m@VL)k(U7hSX_SvNbegD?tMduHN!=yTLm60>4dD@brvVY&W?WSG zAomVA*XXYDpbfZwWR@>kHIddq190#|Ndy$wab-iBJ+_bo+#Phnr`u@*FKr^kFo7K+ zu1SprB7~%GqEJ7q%2;TtDug8i=+vH&#Lh|EF9L7F*yIC_GUoxSI(3w@&Y2s}p}=ot z>lyR)S}lJLy(NqgUbY5fQ4lc3yP?Qz$~h`)iG8J-^!}Y0=}+mJC%Xq!&4&wCZCIi0 z&!>1l%i80a8AU@58aq9$ItK3iGVW1?Qm1_tV(((xCLT87KM1EBQrEwT7?1K=*#_6Q z@dZ^L=LAcy*1LQvI53X${M3L1Pcf_7H%KPa``RSj8ht2&q4|DLTy%_Md3LY)3A{IyQ+Vuhfx<&DC?Blg`t%SbH0Kx@ zAZ_>Ah5a&~hOzaazdYqTP*t2}u94z$DRN1fBj5!^9p9voG+Ao;{svT+<+BGtSEEXz zI7Si#E|5iGj}w*<8#xIw9-?;R9<+`N=Ip|T6I+-C89D{Nu641e1s*h5>$%?FDR}Uif`yKy@^X7GMqFoJ#Xc!NErt6u<+Zf z%mAl0_v6)0!#WgRJuWmA_4uOwJGN&uD*gRF^cR8~-oeQMX-tL%cYshs#M%YD=Y4v& zh2JA-k)d1=jm6sM2AdFo+jOPn{Ag?_ZELG*=^(}VXWN*$jXwvBAu+#=({2c7#JLL- z7nVSI&&#aCmYpGM)#sE2GnGQk{nf9}&$?3VnP!pRk^Tt!OtJXZ_72L&A3n1+cYXu^ zI_T^4^~M3`!$NWH*`zLJ2`dZ9^es*R?khl8#v=%ynThSIyfTRr{KK5!E29ZoY_ETl zD}iJpce1MrZoNnEQ;uqxM?QS?fnqZ%OuNwM28kGRcgFBt%Hxm zt$h_MXack?v*`fny4Z)+XR;?O-1zw>*ZEWAP$jW?%oe7*@QbCxYPE8XFbm`Mu5{{> z3+ocDp1N;gj*g($)t)Ywx4<4h=VT)YY2_OW$8&8sT^Qpi6wxL-bub-WS0DWK@gyWh z+{s0Mej3xGM9jpbZUeJDsru=u+g+UZJXARWfGbX}E%=NBvQ7UH8@r+43z5%Z&U9Qxd&rOo?|~(0hI8I>4SfhM8Cnr~Q<> z@xJyI+jKQR%smJ^UW`!K5Da)2cMS(Q2Ah&*If?c3@~RRcjjfXXA{68b=0DT`MJK|CsK~mH_?!A<4vwOh=}Ua)5xhcp2-K zUaozXPL-(e1B}8~_l2<{4?!9rO5-M_LZb7{XY<^-nZG)bxZeh~PwD+w2~wVgDw$~3 zDctD{+{%l}SgS?#Us(Xuet2;HP3KtX+*@GtnClYFr%fxVs^I?n3@J_C%l3V4xMn-e zW7pFlwB)=-cwTE`E^*oWl_A5qJ6dHQATPQSXnnu?G7*rJKb~3d!!*}9;FkNbGSxyy zQOfKAyQTXZ=U)>~QdFUzifl=`38X}Vs$6t^CY!|~qqxfN!AJeWi`Eo@O186&*}o7L z8xH)$i8>T%%h%klQHXPRy(8Y7+!?S-q)twbDsa+kWeIp2T&MM)AAx3kF;&iDs>K&O zMFTU77cr0nUrf}3odNcZ2(x7gJFBlsn!e;cqvwB43SNHI4Ht_P&tf%)e@us!Kh0p5(Ql!DmsdgPPEwzHP!!3qyL;u|Gml5 zA~t~Vx0o86m;C?v133gG3Uzp>F3!I|*Pp!W_ht?P91Tt-!lqsv_rL!@fy(SJ^qKE; z_<#5JkIDSshwnrw!h}l}%F=TB#WDTgf1nN*8vBvtV)DPM`s2u*JE8{bmQ2zE(Pw)X zuQR?00RaJ@#BkZoh<)z|e2J5obQ66a_iWLCJUA>CjgbUgTJ5jbct5YU0Tda~Oh^QL zq?t_jq;7Qc4->D4h0{ffJ)f2XVUd3mm%%7Jt`9(5E*o;+8!AD0ou@bu@VjpC1!V4Z z1;Tz9KR8`-tg(NgxUeq2vqA^NKovvN>C*DXe3`S4G> z2=&9YULWfWY!dwuBlO01^@_aiA^SSudpB_Y*n=z(yV77omGLsy@zyY1r;W8#?<_$o zyKT6+J9_lX6csVT{M^GAt?d*X>c9@RKHho>ZFSbg`pE zlkZzQy>^Wb43Hr9e&Uxh6##8vhB9F^?!f+NzFeEt=u!iR%`4|QwBr@8$z!iXCYf9A zbxi-oB%GepdNGZ9VgcIFuh2l??X~h%ixFgGf`qwE@_kZ&(^8|?r|5x+P6Qk<-tW4w z|Ml$CqWeps!R_wvO1?aT{5fmQ7|U71!>uMKm@&GIS0G9h2U@yYO)ZWtFRiszOR3VQ zO>#_{^Zn^mX)3$z6g_Kc{{(xYuqf={(ys$mM@Ow;R`HfNUtB|R(8`#nd%qArmWE_p0 zTF|);SXi$KMIVK;5w`ch`DxlkRe0bLv7n{F(*!$l- z%EkEEE4;mrt<%L`n!u-4AduE*+7`5Y!0HJSQ58Xdkd+i|I|LTp&ON z33DSqySt~p-w0q5cw)7$WR)9F=RNpFQ<;y!f|^C01O@DT{e$WM9&j+iI)VB|pWhdk z7xde<0!m;eG#Lh>@#XWndp$P4`&_r^dvBf_ert7I5HMe>uX_#SLJK&==XX+3tJJs{ zy_dY%$?TvwNqD!^P$+paT`G?(>^OU~L}}sSDhmqfqS0-3nRSIXxl$iVE53D99vFe` z$owsWeU6X9p)yaTe;i*EEoJ}u49Br%LuQr+BCb@RGW-Nb({ZZxD=N_tY%^ zFXW0$8T?$<0c_fD3L_$xAB{$t)v>PI?Ot=9xYJlHQ>@k+bwL`)&M(J-M(izQZY99k zY<{3|q`{=i#VXZP#MP#;!Ly@r?b_L_EAzlso92rB%Q8?n!mBH}Ni;ANG5!_uX?P!T zMW@Z|E%}NpUpKT~sYT9O9SB-baC68WIK=*Z$%;R6oXRGI;4PqP2_#J7aaD zKcdF)rN6AWW^*Ul20;cS?mg}K!;{aIOJHeUPKnG7>R?PG67Vb%z$ds1=HtF_e5`+N z3_Jb>5~1j(<~RGYvR?>XRz;YV+J2`QzEJ z4a_m@BeCDZ8PkR#@K_k)}8a7 z*XNpdu++`R^)fcoXRZlrq0IaF&I!Z60@E`rZr>c--$feDY3S3d)aXQLG+6h}qlVe- ztOs)5`qwq=EtD#E55MtHPca!)*=S%MEA$9J!DnD&Kq2DU>0+QbS%NYcMM!o??kL*` zE^+{L&KRFt6!Jc4cICHmc~XJFFq9#;|ZM<3*t zssRNTY==AW3MoFcKc^Ozg-D}#wWzRgrsCxW(%D^5WHfanm9}YP+R2b`Y~X!hNJ}23 znf`*Aox!98BiG~Y)S~M|<88fmGc5tPy=;Cy;b@bv3#ZdcM(``{8Q$Bidzp`$+6%tC z&Yxl*cdZ^}Vh5+Ru|m*OjW&mp*BBp8JT78y66++e+_l!=BWiLfgXeRHWg_?=l|^IV z6WOs4&3#Pv=6(kRImV#TStMGFphp{e16ChUc`rh4Hkb2ryZ6XR4*73rPFk2RgQSIau=waHGhD#cC(6-*sFb@@}2v$ybLC zBybBS26X}hm8iT$sVPm%U}b9^RAFd*8IT#P=PVvR$|h3l>4 zd=>kNDxIBY?Mdn0aD3*G@$5c|KK{@3=Q4iUipCGxneq_Jj|H(t#=e{IeZJ-~gMFVXr)-!H&!>xume3}QbRvSK35OjAUV4)srPERI zh#lWIYg=o>eay$l0Ks0~1<8|^ce3dMI$K{2yY0XWNoPP9BM{!z7Lfl6m9b>$MF(iP zp@n!wVd!5)(V3q4hvK|8IvMVLhS}Gef%2F3M|T6YLC{4m1onl#{dC9}J!pw}M9fk9 z={&)V7A9t9=FmOmnrC!y(SEgBq16koM*r#>Zedr+%3dbd<-Ub$Mcb>jKr`F8=$h$$ zJ$gyY=gswXPXBMENKAw*+q7LAt3FpKkkegdE8Fw9EIV=z!t}1|4r;wjlky)AZARC9 znY5OTM-H3db}2r9B(z^;=V4H>@;AAAHUpGoz}>lBDvRlz@hu5tYO6zJf*%`Z^=pR@ z%T}3oo6@#`BlG&y;~1O#q!j6&*pV6StrN@CqG$>>27gcK^OZ%Jxj3b< z_sroA9tOmowVv;K2U_E)fVbD!Ewo15?em61#^nUt9^}yq;K zFkLoOhSsBB`C#22%)3}3Q<~)ulTF`+u(%ryq$ts7!6s<$_@S+|T}wz?$L67UnZ|Ny z)|xwf4E}iU;~&rIw?sbH^u7S^)L~wBnY_cGKq~s@wwgdO`QQNurv?U7g7};DQc*iMf+SjtBVto25oluWrsNg1HjaLR|LS z?tg{Em|g|XKqvt?(m1lLu^^9)`6EZdrwN5FLcFC1@jr)4RkW%F**u5Pv~Qc zt%BL&`@yPoXO}#o3A+g!!HO)U`PDcW16yqL5_!tLC^>0HygEoLc2ba`C9}3qnN5y^^T;y1F zOogZ8k!$W5ZJQ#$BiTEQUB|&szuuW0{3X`%A*Si$T*>5TU8wW#MUY=w#7n1A)zSpY z{xJ_^)?`BOZu~eKCFoI!yR7PLmS60oa=2O7s90|Na&k$;PcNyX_4WK5*9RPEX7o4=No$I-RC66m%){;Nm(hgY029aayXH#)*s#6Rx6-gTSf zoE#t;pLvUPY}>yqGD0KsC6x^|z4bX?6NVYY8vvi*Z_UGn_6oDyl_hi-m1z>-*5`zw z;ZQ2SdB8{SI3*TNKp8W?&=zrE^&H=SQB92sErZ5B)cSl`-07x3M9>RqQCkO{4%?L) zo9F@}Cllsyqs%O&XK)MKh>j#%5RNoD?P5f=#~ zcT$O%;Pxa^s<>(xH50X#@>t>2x~e*@3tcUTk;s1Q52J$5dhlXy zk26GoO+7wVS0a5&==6g`lx!2R3!i^xck1hbMy-au_a_IJQbx)Id+$;wDfR@RJ5R*3 zbsx`Gs?ug2qGCB;L$s_S<KswcU%T>Jx zm0!a(hv~j+AZ|4Qb9e4h&U7P;0tXw`%A2qVwiSUF{pEAI5fzQ8D^N;H{H2eY<0IWp z++UrkaJ}qu_HOr0q5d{I?F$&xit4!YkJGK)#8K~3{rbV$dh~5LjhStTFdELL+0a`p zIP@NI5IsPEE*xudhDI8ozvMwT4^8t%xFq;Qv2zvPuJJSGFs9(7ckX9ce5jgMc8_@EO396ySD1I=PSpM$4=DP z>ZxyF0xQYXI+`V!nv=DFkm;51@Ntsx^#F|S0s<=DQbmigLA&9t__)4vj+ic`HDSa_ zabE2;ltGbB9iP)BC>fTdB-||u%(fF+lGV+np6aq5ymeoMFPscCsTH@zFXVG;; z*Sq6%yOCk|zIFSZVk_2+bcMVEsa+{4V|Bi2-rf=F_ylgYiBq51a72vz;l+c#LxQ&E zloe&7O)%=<(-aS&XV^;5)bFhwl|&yu2l6-`Lq-lOcdmUFzM)Aw>}1crj6&= z6$SDyO)4*0(jU|Kq*W`}W)Dg0bguFKk226Ja1ykU|$9C<VUNw-^?_8EWz?H)&;sEvd_LmIt9Z4Q4`p3N1)e~Cze{kme|l>bhHLMCyGugj?` zfK%|Kp^GVQAvOKn3sG2jcOLGpoRR-l>@;x+Cz7PyE~$8rU7Nw-=dxSE5fh3S62Io_ z$gB`$3v2#Mxv+TlR;2qk{vu_mCwC&%r%>1JTMVidbNd}=3}I$*%cXs3(o;F-kXRjH z3_uto?d!$9Frz_y-OT6PUwf?RBosbKeDHfw{Zo=btwuyF7tM3Y!j;O#YV4*EQ8}RO zOf^u5l)*UN{XI?lM7OHd2vJAp5lqq%ZAVK>0+cRXJ)x`&Ocs;jG#9GSm-Yh~NT`TU z0B|N{>pL(!CYljNhK*Cc5EB>ZQi?WXqj52iuRBgG;;X=*D_QV*0OoNY93F_}6F$i| z?zq;HwlhPa_zO(C)TKnw20p(@Fzwg#kF{zhvZykaNwhx0lS!=(&%C7L(3npc%p^u| z18j~9O((^-8A#nV&`?P@7}}gfHCGb^Jo|fxWvqpu3f`*(sUo=21#;MtN4}9mRW5?p z)~{Cv6{EF5buqm(YW%X}vN1L(H9PMNZa;uktCM88!r;A(t+7CGdlO$&_`vH?@cwjU zDz^?_{5ip4m(EL{xoq_thaWE8d=avx2Fs79sgHVVEFPz`of2z@kNy00{W)ds=Rw*? zkG%V{_1fQ}H^)TQS5x7_FvPG2E0=h=+5gF#!g%3^?6A^W^v7qsX4SsxIU$LadIn$b za$PySGpoLJk%}7Qg|+nEG|Rgc(Px-byd^v_vhij)G1PFfTL>JB?LdG{R<-j!hS4bv zy1|8=3ra_!cG+cZ8K>v$hAz&d%&rP)AcKEP zJI*mKS6>8o<3k=3fh+qM7ie+t!YS`qeX^`9+9->o76*Pt%D7El&N?akee8~El1;?U z{ZhI8=gdUfwP=$>m4NU1&CoWJNU?%gk0TxfBpV6Z#fhwLVp?u?MY9P^Tb7X_m26|j z(Vba_y!r5T!ZLz_H&-0fbR2+Ux7zU%E@Qy3DSpE*_t9$cR5nMU_2rt?fb-@vRPKV*1vIW}Kv-MNfZ|c3{-&!Ah2BhLBL;T5 z!u24zK%n1)4#PQ9WW0Ci1`JW zCyPt1^q)|K8iWRAsZl>|L3Rr$29C2pX{w2~PL394C_%4?e}j7d0=A&MBmv@SC9mE@ z9htlrFq0mNx$FfD{w-r+P)sVn46D++Qc0+2p~YnIhaI_zdF~6VvHs{P0C~*^V*W}Z z+w^14;`cfRNJ}(e1?_Wz`kmU@NK>h7dBcvR;OP=xYo6N*r)G*$fn{d;j!_rR{qPsuZiX>XL^ez#j1E6knP@&EA< zkXHcegat9>lrM~827dwW-wm@!<_Y;rwar6PRP}19z^3T88>7&#B!WlTW&{rYL#8P) z@qc<;-@nBN!S9ZM<`_b`$;lfFr^@v!@E7H=?2|=R`;w3z&rD9XS|TiNG3HB zD1g`RXNQe}Q(-#)!9N6_j$XYwyx#GkZ`ntDeQ2K>0AsZON+NIfm`z>qj#g=!nVgto zOxM}eKRG&hZvnl16xAnyVgD>%J~J&%AckIRIc1_yS-C5ih-81JIyz}i?V;1jsd^*nkPGgeOR)y-6?j=B3cU9n|@{>F@{hjWn;J1B4MmdjJgrzdcMq zr(^58?Qt8q(f$&Uj!0y;tYv+U_r_WYZ%*c-$)o$fV9q~F92B?%3#{|w&=0!dvqbz& zdG_%8-8b~LFC|xGk~yN$;H^>p1eb>qj!rAc`uR;w z_W{vfsG}>zF%%4|Q4Y&@qJUnTAQ{J_1a|*Y-+42Uxb|>=3)8NhuO#27Mf0M!gYX2j!ul?o2r|s4DfXr&`7E&k=_Z%_l=iLoL`N6%)}G7t55cZ*h0FSp)Cj=PYc@o>L~C<9qSPAM!lPdsJ%mYUiRhOE38|95Uf*z-{J=& z*kf;D2DsZWOujXS9qAwBmnYE*l^p!ho&Wu5Kp*rJYL+iFOHAE;Cxfrux~?>xuvZ7I zth#GggMiCJGLwcFpPe`g4^HZaa(^B`7XrTHzj<(n9t zA5RtvC?>AeQe`mR?-fyX_FoHu8?>%K5LWv16^7Gtp0X2NBcHcU8dmNgq5 zW9zCK7TkY-x`H0)V;%(881?JT{h= zoh7(hc>&9n=q0hCGzd|W9GN&#GVVC8lj0ltTSi5w=CY{N!JDjp+WAwCa~X2vw--%_ zurGvrr2l(o`TMVzUcZ4dLmCqnLe^2qYf7U^P34tY>4m1E9lx15gRr~jF`}e;SsCr_5s=_N=|K}Sc zdr^Xc0Fg&9*+B9C2tv#J0LZJ%FJoQre=RRSxGe}!Q%i_DboqII-uB|l$x{?eVsicOfBUbmltFung1x!=^GyHuJL)4n zMZrk#`n!MYxc}N7f$sY34YU%#rm6m4Y#OmT@R7voZ`JtzcdxZz_yVnzk~UP9_fHu7 z-@lxG`p9Xa(Gy^V{A-)|=RH!0fmS*KIm9SV{O@<{!2&+A2kTo**#B z*3OBBB25^1lue`t^OHCG_5iFZGdeym4Y+PgV)3C-p1jn^Uw|3}k8(iihLAzXPQdLL zUWy?W?Off!&Sh`PxJnT+#F1#iC>gVvCDn7)$ie8l=}@vCl4`QnSE${rm0Qqu82uj2 zCcR)ktS4{0&|GIc$HvA5Z=5mOLv#_mLRei8DBtyN_LId5g-im2B0r@Ybvr_pmJv9P z{GXFKvX`lk06=a(O?`b&W@cGw%TUQAW-UU#&r~jsL^-T!wsy7|?)Ntzsx38@4a+x% z`%pb@U`<9V-epdZsK`zoBxmZyqO_`idWa&HjnK-he?dEsL{bRwy*hz704z`T`5d!9w=3jWfMs$6`PkD*%Yp8I zRWB|Y?+cpu?Wq6s5u72^(xvpy{;%*K4`S`G8*Ml$@%pP1ddd`&c4S!-Hv6OGn8DYG zkWxu>cjwW~9P8Phe~lckJZ}N9Iab`NZI)W4!O}P zw+K#?hhCO4k6~z|hkDsD?FQ?2r}3nygnUr|$MF^zOUdEObJM3~ceBy9_cI|MoNQOS z`8_0nGSF)a9#8iB1OVQ6`+Z~e)M7f%51D{dg~RsosqC;}r!&CEgi0LtyXCBZnza2s zmS70@mrn}R2!aXOg;Fdg6)YEOsDU^y64nK^mUa7qI0=Wv-M2yxV+H`&Y%k6@rz>>x zleq2pj%nJWbjPLGS{xNyY_68%l#8DjJ}Zywrhz0DO+u3$;;bjIkSNCm9;Zm7sS8o? zL$rLp68)>kIh^g0+~o0`2r>YXiXBu7(~rx_m;Z6nNlCBvCKlUJCbCnTG1A@iW}(VW z-RUexR+^BXJRj3&W<6Iql~7t@5+I{&&->V>n;f$34@TvlA^n9$b!=8j-`abAwBFxg zyWZ%}{MLI#Ey16)L(5vd{!0DQK?C#UOAe_+?=Y-BAcEq>Cl58=DXnwLdZ$-we_XaV z*CdW|5JYGOK_lxzj%3i}u6t+(xG0RC!8kUlD$6+rpxU&J)A{~l|2oUtc~xUFz0Ux) zW4p7q{lefj2r;s|?m-1`a{~vYUyBS5`a@$B8$Gl9PV>A(JmzU+Omw765c+{l4ILqbizU zpfUjc?3=@=dG5=bZj&ibk+zjcj#bJ#lqut~`tvP1VC~quIRP$MEMjP_V6!(59c3#| zRFj-g>W80f4B4&PvTFAeDU+Ov>j-jwVI(zB;Z_sM!|u@FdUSgoxCqzR0T)_n();63 z1M@>627@5yV}^P@ZQY%%(xTSCR+MazSFi#m08hj=)?d_SSuK`Hd@`R_eFo3>Y_sW& z#H-PZGM8>G)n?~N7WR}Z7|@+NQ?~C2^QY~ZUN;3RIe&-81)rx=;3YH|17Ea3Tm5md z!A63=$Fcr3_c>U&op&Mi`~=x#JjcsD%JLJkn~Q<$#*>%5;KZA;I>ddTor;LpethCQ zQ`ztt@?@pi6hZm9EI%@(G$G~wS3g|2^W#h7Qy^jk4z#tO3IHxBo^$vK`l&YG_*Ppr zm|R3O6*7mwf(*CO+Aj9ohUn_Gw~bG)F@SXs&mbz|zA>fh4ok7Nc&j`F5I&$&6`aoJ zX6(66&bJv4TQ)mUId6~O4XUwe@{_*I(aaA0SgCcfH^Z15^)vN$M{=1AdnbK>+B*;d zl@R<{_VGBf3yiGS`0*4d9SM>`AR+Fxztf)y+rG(b&_mkOy$B=X%^m2vY(t-FS>Z2K zEb(W2l*63#987N$XMT5I8X12oqqtG>*upnN8gK@gU>tOj+u5qolNN}ZXxDji=>u0* zlBz4jMFU=PU%WZgvG8SBO(!tS2SnJ<#qK`NflOZoP`Dg~;4HWfPZZ;@UgHSvbZ8v; zns8on!D*1!eL7qLxnvxl&SZzVAU9!G--kNk^U`^tw`Iq3F{)yxahQQ?$bJdqlvrK5 zUusd{kmKnwklp*po_u>I1qhvK-f_3ghHY=zn?I*m-rewccLT#QF2$~Ebp+jKP4a-jm;acD{^v4!O3FRcZ zZ=K4Wcit7R1N`&>XF@Y)pS8;d$JLW$@z&VnV4l`G;bTQsFKd2-%jJRQ#M{k&;)#=F zj!z$kQt3y`8?4epO!QhV=jFXuTEcAqTetltHn>zB;6iHG6a*^BtWYH|BkU5J}T zc%BO}KhLKXcziu`ms`czoHG~@MeQO{nG1_>8Ed7OhytsW6TC=i>b_ryi9*G4(ZyZ- zwVHUoc{6P+s}1LefseBpS|?2BddA8^wstY8xqQ@Vu<2$bKEXhxoC%OZUn=6hg{@GoVN_j|4DP14B> zB9+xs?UqS$4<1uqCr}4~`2gGcU!>%7N=3Twf2Q5q1VQ!;Fx7P7TAad1hF;j|>~0X0 z<$jh7-uJX?`A8AYDG zz{x-39(WwGs85DMmZJ22<0}>{L1%5v&E>qMLLu7y9xwY|&IG97y5B`_2lKw#Kdtt0 zzL@Vzfjn36w?b_`mdTWL38h+zn;S}xv#KKDBkq^)`M5 z-AeoLQ@` z;aM-h;cvb|q|R|Gj{rgw4rbZ*sdfnnCIFRCqQT!9W(qzo)?Q=*EkX9drB6;GKxu~% zO0F7UOrnjgB0Sd>gl?5PsFFrKZF!Rgpe(p!upml0INA6(`~^%4lb%5kEC{9X)ekD^ zizNZM#-5iO^pceGdK%{V7tWPLP0p~C&8&4_c#dc z*7r4`+*Q^tZ-G-3W86DNndArrFtLpWz>?g!6-yNMx0+`~a{k16Xf9A2ALz&B;m|gZBt*1@^ibayOQrDXa z5rZg9u+Fjiiz;%WE&NypS8;^Aux$m7>y?i$ky|)4t>yu5?A^t_tXq^*rzi=*`pyuN zY|Y%*wb6?N)*UQA!7IY1HkK&e6=B>5?CztNdicn0n%>B0{Brc zIWKt`z!BbSn=q5)f*)hUAQ8pY!OO2;@Ct+gKnCKsDRAZFRP^&s?;;cu9ET1W1``v< zy<1UVVu-N#I`&|1vT|W(Jz!W*P;G_T$$fmiAn#vqIsYRWftjFr?-^|8EB>Yijmibj zHoLhn^^KZrYup2QCFxQ#6{krYZLvHyrpW8%)3NwvX|vp0`S6pSHS&*X`d{rJL}8JR z{71ugR)MMMxsy95QHTBI|A-1tr#Blks%z1gn<1|om zTbxoxGAb>{-p9C?lj;&K$`(G?`Re0vyj|`ff)PYPL{FszhekBm&c@dgO)Dz64q3F! zhQ1oPxkVKks>OaiF!3{`Y#*N@^GY8BALXk&$^m@bX_0CvP)cDpSP2!|H;oPcy0Z|& zq+O(>G(8}U+_KtuVlLi`3`F8dG6{3W*#?#gM!YL7);@K7Peki{*U?(?qZi*CnAIm&M8HcHXYAsXaa;eq!ymb@rQiTk1*mgJquK}t%-`Xkg zo-CZEx~!nvAl`p%yomK&Oa+rV(d;cv@Q*GPe4dryMIF8lC{N~B{0sZJ=|v_gFixbX zU${P#iUHU2vS}8e9kia2p0(mWqZ!6sWgg+r1y+=SFxuE4EaC#?D(vo7k{FO6zMJg0 zq|JrH`WIppT2>f(ORCV?)_ZA9ZX0VaY|bpT{C;^pC%k{Vr_ivy3Xni3xzyA@a}nt( zEziYjbD>9^Y3z0#@xQ2-+HW&x(|y$*@sK5&5L@SHUMIl)^_AG0(X`?r*||tAT|u5x z!<1%q-|b{r!()8^H&mIs%}wJaUefY(Fg&HV zx}QlHyya^?7+<2IY>xG#h#>e+eqz$iSwxxiUwv>Frk3<|`!b}L9jOttZRcL#ZU>wJ z@eYBH)7}wcY*Ygx#~Xti5jIDDs^{b8Zc$n9#4&l@S9DZf>6rm}xVbw|Ckie;$uiVm z*d1Ga>$GhLcsc=aeXl}RLPyL-Mn;cey$>_FD53C$5J~sRe03I($EogxS&Ld+ny+)B zmZ*}j2c(rT!J4wV6XYReFEZ)*T|FIi(lC%)UkcUu6TcRw^16Mim|vJLr)t7mic zg~i?HI?CW~si@Nu8q}okR+m;p08;K0g)^ho5a3x}X;TY>yd>Ewk@HE`vF@HET~>UW zI`G~4W*?_#96Lj9TifAlTD-v;?FZ~O)`r>0AE`t-mj%MTqmgYCts9AZNbA)-2|fz} zvxsB8UUKihG63r$z%7%`oGd5B>3$SCxHB?8`Y^_k2ZK<*t&c@teu;70&DzJUY)YPu z_e3ok%d1tZunxS@ow9L1gQUC0dE8&G_$u+`X%rXIp3}9Re^}B5stT*X)u669sw%1C?k0#FwndiV0G|;MvQbs7w9KEQw)_K0{m*6_+RD@)7=|%OZGmr2~C3u;lJ<4id2qbx>H~}dgu_Tr9>mdLd)>_Zea8bW+?N=@9o1p zv@oZNS^Clx*^>!Ftd<_%29-p}Wn5+H9)L8uu1(2ae>6Yen;*0o#))>EbIG6Z_N_s^ zy1@`ADmP_hq)C5KMH<+Lx~xVYGR~(NpRCVKhK36IflOr97G~fr;at9xb4RqJSu#C%XN zHdS#+1x(z(uufO5B^XpPiEjVuz<~aWztfZd$piIy3xtR{Bt+XwGe|JJThLYKq`RGV zE-FK`ph2#Dyvi#$DOKtj?3JSS)RCZOEl-i7k(eKi_NfZB*yQm^DGIJ}Xv4>Whwex4 zC8*&Y2~}_#h_POH$*#%kJq#>c8SN02*d?bo-AWX{mKa02uugByUwAz{EQKxFw6PxU zrVievjyIG|4`X@>&K>vj(2U0s4aoDmC;TQ1E||Bl&=iNc>Kcvf9DL8=btzj_BIV}T zD5h_>4`L16OWb+s#uo!$Gs#i0yc*(j7p!yO*`mOj@V7T}C_70%={wVcok!0}OS9+s z2H0}X{%o<~1VDn=cHNM;N$r!ZRI7M`z-IMR(sA1)-OJb!>6n2dz`9rm zfFZgXiWveO*!ZN1o8~c`BC-IX>J@87>RKvK`Z4-}S<5)e9D^WGkN?xGYD|p@_U|qf zz!X@@4I+_+gH+P|NzDpi3Z|tTz_99AYgc1LV1T!<8f)3uJV!&+E!7qCrpp}cbS0MO zxi2Jua*rA}!0~8jGNx}2)q>9PU@n@GPxUp(fSmEu9tk5=wpeI=_5!nIbHz)ncdj;v8Jwil#iv|dgwJVcJj3;UKY+%(d98x21oXRMTE_f5Rlz)(_a=q^qYO> zDyr}Q^?WE0V7;Q5cX5aSvaCH1jhTZt#TrdCaF+*W8eVR;CmklW`9i+F0%>9)A|a-r zAq;kaQu`;-{EbwH0ruVZQiR_x3uuI@-~x?0CDZkZZ|<-zu{5@0brxmrgND_3&}~p# zKtaTVUv*Lvj%zacZ|LVZz2HOs9CwL%VGK#a3CSdM{~Tm6gInn5p>@V?ESNu@g{zR4 zo-btvDQkGP-qv+40h(~euxE;;OB+k}aA#lpi+{%S1Oaq8TFwDk%gmqAj1Iau;3BR; zcqw-?;uX}aL9sck8O3++(g*CNYs>8cC+kLweU0^(FX)~1Nx&E?zXE?J$I-;n#L~G2 zM;}-G7e{x2hU)!(FK$1ZI)*N@uS5Yx#-EcR+!7x+AaG%E8XOfP9?7vGd&TuJ+DwQ{}0?rN+VY-?ac% z)=Et+XWutzl)Rm(Fr#n0ISZFrcpPxk2^b3~Mm%HFzapM4F^o)Gmk=w!1Hp1T-6@p4 zR(x>xm}lo@c+=W+s%O_eGibZJQJArh&ymqQ>uhm3l-EosJJr@EG+k@qIC{ZDkDAi8 z$WygSOZE9m!5+O77R>cU;Lqan`2%`Se}8l$brS~d56x0K5P;Zj;pFSUIiKxN4BGQA znU5~%!mb;6oRFDJ*YzMkk{lzmhHEwf`4g4j`A2d6#TF@`2lTwhTevwk8?Cn*e&HlK zPa8ojfjyYOPs@L|jhgXpj6$`K>C_xRW@zVM4%a#C*2UtzF$LO-y@x0LwUx-A-|KgJ z^Pyvx&v7SXfx7`a`@%$QPLPed;#)A!~pwV#r|4!aZ4H_vx!QTME8a&~X}R6Jdh z>vGfG%PpTQ%QQih8(I{qr+n?i<*PXv-HjuldfkCw&0DuSnY7E!UXPKo?8T928)G{3&$OW;Il}wg%M=vru&C3|7x^L z-CMLHHrNNsi_!ug5ssIad)9FwWIWY24hRab-q@Cu(-5CP)UoGl-NYfg_fIa+-o@+b z#JuY{%EH;I7_6C?zYB#vDZ+Ej9*dcRXiP%8Gnr=M-Q!vu9&(qW3&bpjQjvvt)%`^T zE*f(Q0-ye^Kh&^vrbl962~`N^}FJ*{q{vy2W~<60?+Tv~ir_pVcXVeJ*zDRva~ z5?F4OR*!2Aj(8G5fH603`tHpu;2N!5mnQnS&yKr4RWBVZ=|uGT0GF%zY{OWl)z7lp z28c@n^B48ZVlh|Cf~+gJ%V+xiS^E(ittto&N28|)^M5G|2^I_rW2Zo2-*+j`oB_^= z`pnE?BGh~Tg1&z3>L&(++c=SA*)7Tzy;Q2j%1#w}sVu^2BG)P>I|{Z(rNW9%PrZ-gk;wG;x#WZDbd^CiP}^1P!yw#3da~8S z7Mp5za0St7(*PUol-WE=q@?rNLDuQVnugn5)OiO>%OXixJ2%$Cq*QJ@tr*u+%^#`R z!%2CDjc|yu(7p_MU18y$_GjORW+8O*aupAyT=RWVs^igY?eyq*oHi7lkk;?o{-O4o98Flr#wwwtig$ER};vyn01|%~!aNpY7djAFs1I)YkjcrLFF^6gQj7 z0g}LDcCeSTc);FF%&k%g@#9#N9`~!l`&N9xoHCgy_8Zt|mjstPd6=E2jM9^zDke^l zl5as~5n)Nv3B=bAmSaUBczQ-`Dl-%gfx1Z<={fG;Vl$u;vJHa-Dyx9{lVTJ!2w(~Z z*VVMivmuORJe1XzGs{GxgsS~FG#drf0BU4!mr_Zo>2l{%;89@Upu#dPp0tQHdGT&$ zQ6xIMX@GhoCj09^BD3k%VF$1%XCyuwQ6bt20z`rw)LS@hIFl!(X`TIDSbn6E)+i?@|^;_u?OMA^Kz_Krk*51a!y z!Pm9rfL}(Hda6h>7O@ZNNQ>Ef+2VF_VGaNSoC*`HNHL_ z5E?Er9$SZ-{f)h=?1^sgMaTI0-gbH2q7C~c`r())fQ`g&@r53%SGpEj8rjQhC0zcv z#p!Iu{i?2|Ix~oHQ1dQ*J%?E|>hY}~R^<31t{A+~tu(LBfVV<(Q$l3E4!n1(m-IH_ z;2GvI!H7yH= znsNjfp(1Du(-N5OGzd}_5|J$qZccPvPlY@Mb}I%l3ImIsmeWt(Rn)9o2$!tI;I4rM z`}u47nVqNlH>WMxB${cE;K>rOgUUElwhOl z#r^_dW!~*CrCACjU0ejKe5ovTKc|=~9;>iw!G;ERXow4Zf#@tA+#e9GB6=ao^)##3 z^&vi2jZN{e`RYP==i_`L-x<8K1QA|j7cT9{R{f4BW zsMKBOCJfCW=ov0mccFt_7Ug7O4kxEgpNh#0U9Ah+xp|tm=42%;jkQP*S~l+k?_@fx zT5MsWV-JfIUjLpGV~+$E z_{;*%>$hYMiZeBq<>pG?Ke%A}4!O8htE+=rl+FX z$1(9OQKwhC$GHgY`c^$+u;pIN#KQKyZUg|q9knHSOoH?sU}MJaMwVB0y}CY{Q_xW6 z`>HzwPFSawb^^LPp#hGFG3u0t$BXLG;QmX8sn;oo{ZAJ3H&D(2x1n2{ z3kDtL&_d1-`A0Cfsjio-?`=wntV)x9bp7i=nTIWlONJO6!Mgn5qu$kPZ}sj#JxYxI zt=xM!ish1_oYn>9wo5b1&pk{}_1OZb@ ziNX^adrv=At%B!EAMw^x2VGDY&{&iFz?WzPBym5iyhVba85chzY6<0h+oWTy3Wp@u zfuGIbZm#F&E&JWBI-2_7=0Pg8wRgO|I!DXf$ z%bEYeeh6VF%yl3R!{x)xez`Aq;e!vM=n2u0!FGuk}Vj0(HAc=U*n?pfKxMDm*jT3NJx< zlasDIoy7%MME}4tX<8QF^)HlhY(*U_Uf>j4UOEJGES^O@ds7pLl;3yB#{hQ&wCfm@K%<# z^Hi)KE=Ubzv;k{2S@cCyz}f@aj1H{?Uia>^VAe3BtH@_Gd>Kfv3V)?s zY*vYY=Oys=5Ll%~k<2kN9?gfW{82kK?NIQ&UD zB8TY?w;gv;shv*CY^6;hL;^@Sgxsb&HXEfI>@X4}bop%}_YDHX9AfR;dozEIl*nFv zB54{hUfefKFJCtwT0Q@{@1Z~iwY(Z^RhgZyT(r}@Iq`z1;~}2^gb78D{cYtb_c*Iv zPYHCN0A?rrkqO625tQA_tQ?nIpjVGd-+)Jj@86<+NIL18ImqF{#_ImKw5mO=sT{3l z+DDYpz~w}av$IsLzggQCE@V9Gy|xQ=x`r6?Is72ku)uPDTyE57n`sQy%*6?~{b&sa zx(kf~L-*vfbt)f{)@=o!wuk0P9HFp=-h%w%gH|8%r^D&a_?vb*w>(_eE(0#1#AiMW za#FLoNQPICkhpJpOzrqyy+#ruGRqb>2f5drYu%YL-Av-g(8I*SuUrp?S$fBsBlqHf zhC7#LPYtEEJO5dal;OzVsTQwU;2C!*C*&o0!%Zb}>@YlWPE~{AMZ$!%bqMj=Z{kHr ze&n8f1`tWsST7ey$&wycFR?fb8v%k-8X~|0U7-F=73{m2Jj}>(c*nc*9By}rtyH67 zBy^YR>nv(RqG>}%a-^Jt4hm>fYVN{!QJ4yvgyZ>(82S_%p#Q`a7MX^wlYLrf#A7F{ zJ(woFg?=vKoICv&vFz0iR75*qe)pV^n&s=FBgGn9+_h zqFw2EO3x!eRn)b712qZX;u$fd-DeL%^cP-u>d7S@QXG)y8|B>^jr&1gV$q@T__W5Q zLY#u@(30qXrskgXuEi5E=5E-IMDiTCwjrlF=nQ^!IN+azsN^htd|45fR%EQpvTwDZ z$H$Zc+>x6M7VCApiRTmgg(2!P&Kp?Qvc|G$?e>*)1M%Vh$^JW|UEuU<17o?%v9svS z@+Elf)wumwtV&4BT0mg!TVBMafLU1IC7qhC+L|BU6ACY@i9W2ime<%ZYx40h%D9ZK zYezWUmf1k7)#qgeS+((gbk-5{R>TEu)PU&k4Ml~}zD7S=C8?3F?YQ_i(C9_u2>^9a z=ux)-p^Jmdn%2vz-FguoO8g;CJAHy;gg_E=BN>zKEyE#soxzOZ3H8Wbc{dI0HCx}%(zJ`|y51S4d2%(}5$u79`^eky#gIjl6R1;zGyz$JX>Ie`#Z&6qFHa}R8iQth zc@6{1li2!M`hj{wn^i|>5=k8I+DkZI@lC=bQk?65fetSSVhN?b*zQ#QIEnPdk#ceW zW^3e~s%|*nz>Mmt;eb8(f;ljZaOVRLU&Rw7mG+FVU(lOyM~ZuY#PEh)D~F^7M2wLn zE&l)X`_O+S%>p?%`ZucDHR8x4WVqxmLl0r{c$<|_>9TTv< zk|P)@YC@ATb>q;yE~dCxHHC)X0S(Is>_U%!rN}ln zcnZdLf?Yfp<7gF-o_c0^(96`meBF?ai8{0KmFdHUNso}=3xUVS79Bfz@_bza(*)FI z`37-QYGivGgi@dEq*W^bNSJkaq-Cu7dEP541^Qp<(ghK zy{6CFT=svU^ypQRqrcR9;RR}fwmxw z7shUA3y#Agv49sj;AEBfWwIGb1j%jCd)(-QJIX6N?BMw)K_gg#R^Ky|VxVwrEYW@4 zDzGb)&FSS4agv0Pcjz_!Du8b2jB`~K8VZPld>IBaSQ?zRiOn=Ri;ao08y_Qn%j%ys zU^+WmST}n8?=aMN1OyJkF*CJk%~}NW4)7bn4q+riScRj1&?9dUIGeZ(=K1#ZSR9t{ zZ7B)q?!ju`7lA&c^Gl$(ZJ|n(k(mqkpdPofY#Z5=7F+PG@S~hChrV&MW?@acc{&-w z9}X>_P#@ZsTo1o%_OOgXbrUjhQM~?S1+{&5ew@kTUzw65#fyo?AKzb|E%q(W)6a+R ziyCKURQDHojx>IqGIACLgLinZto-dg@#-~ zU-H`*uOc7Io^cNlZOBt2AyFBu^7mhAO#O*{3p>63tFZ0&!!lMn@JC@zdc5|xCY1VV zuPYAM9|&mbnTAdw?^}$ay6y6lb!xo)F{Xz{)x$s!X?z-vwg|g2s32&n3jM;;Sn#^c z2ni_x?d9-05#0<#_=fOqf5MLDAM_XxOv(HXG7vZ~E7V6{#=J~17reN9$;?U+kasz= zp0M*pil_Tf(pd;sr;D=x87BB+Iys}IX^sWfINK(zyiit8t55_RjTcb%A@?*(I<31D z2#vVEFjhu;AlYjD;QvS>P;o+}mjk^S^ZTsa=ta0vn1+rt$(FpRc!q(V%)+7tf^C!0 zkjW??ppL5dO?+7Is7en=;qyL<#`oE6`wkPytVH4Wv1;D&PYncl+)W(tP??_UwV_?7i=v0qww=X2VRAXvB!z3T+`_V!}4v5AHjr?lBs$&CSm*a2L&HDHZn9GnDP&Y8cKoL7nUPVHkQsB zxY*Wb9R>p65J7P8sKRgv*uK9%;z=PqLkwQ|F=YDhfBowVO}meqK(-#&T@U|{U;O$% z+GxZVsCeD)GKc<*^Y@T`GnA2@9#h@_{Ah_XeS(hs_ZR&#UV*5n%5&K{CU)CEkrs}I zAMQm0(IDdd{G=-H8u{e6aJD&UEAHJ*i$5O}05y+XYHlvHd}55lAV7783$52>E%_=C zOQ6Y82~cG1b*)Zre;l*&H0foRn8iqgQ^oGqEF{xuXIOBC9m(!xNqv+7f7{%|3DJ1Z z$TyWjdjoz3wXPBS5UqFcpP;Jr_~qZ8em`~In{J+VG52^qowZ)&)p@>cJzEy_@Yyhz z!ZhJU!J5=pJcPd|0w-Bm2ifmuCb9<-N3trAy$3OdZ2io!FV`4%ke(E7ieq)}ba-tWoheTUDMP?Qa{xor<30#Xd`kTe|Cd zP1Y6r{o|Eoq`fCimUzDLc$V9>(*#7nbI*K+ULo=SD!vRdz3O*EvrV%Le|jFCDQFyv zNw#$7SgfbngO6*eO{+cXJ4KCm9(x$Hi^-KE^Vgn_(4RdR!+(rm)B`S;jyuSurDzpr zBmbjL!cmmi$`}&`rVw5ZuTH_!6$9su#dk?Btty1ZWlztqIyH7{${9I0#+S~aD2VzT@FVTH?C!y9`7uFHozMnH=5k7y~W+v#s7+MfCWV#Sv@Sv}S> z5R<1|Gt<4_RUk`lmJ2nl;{`gf(pd)>?Fg!@6XOFq#f%REJ33cbdqO*%w5BFrl{=z0 zy!IQp5RHoIa8kjwQpad*`%cb=UTXoqD{r8e;w+n9iRnit`@K7pf#mW{Ws`)xknFu# zu-uI2eAg{RubA0X!jvfAJ8Yuq>rH{(NT6DVnAA+EMG?etm(S(YGZ%B$lrb=Z*b-KE zkh-4kP=D!MS+NT%;nGjB1R%*lIwJOhM%4n9A+{jPA_eSa1?K?~Lmx_5UsuNJ$3Yum zmYI5oqQ0d1K_Q|VlM=#^uItP_0&61j{XBw`^1T1ql})>-v|@(ENnSz4{t00j^q)b^_1Qey2Tx8e72tBYvaU35P}S z47;VyLd1ff7mDT8xGue5jOl4rO6|Z3UDly${9g7V+e&7d`zWW)fz!wS9g2NE8jJes07>(y^? z*bZg0_PJg(v>qQOO73PLnx%tGg)ois_C6jg?D|>%&0|RHl44xkG)xXTB zn;2*<+OLnOa;hEU=}p9m_))z3DtWH+Ux%*}kHqw~W-KPp|MayZb+#c+p+v2L7>!1ra@42nN zqRj7eWTaYbK_+Le`+BEp(U-7n~AG zU~ZC0{m^QAZ8tR!OcPpb}fn@HU*3=gE8iuCR|{LR(ZzISs&l#b$s1 zR;_-mkBa@8o8*lt49ET zafPM;%Frc|n@wQ6Zjp%Y$axB%%RF7U%o1V9{`GV`xqR>h3AV)s6p_!tuG_qK7>B{lWG+(DWgGs++SdEveA*z=1;OjxE&l~w4VJ8LK zr7LCmGHF&nDOw}- zUVi9YqaMM%{1G!eI0i?u02T})(f-GU@T;T39uXCWUT25I-$cXT>|@5uJY|_$;re!| zOf+FrfkIk6kyu^6bUBohU;%N^t+!8sXp}&6q7Lbn{SlO%hf*N~=f4$I|4ifu@QPqhIrt--p&Wg=X_8eH^IeVjds+B#a7?~FU*bL|ag zIzPVsaJ=bmR5bHP$S}@qy9KWJM3@)w$?+wZ-eCi(5+{c^~6}FYEj?tj5j(Z#`g;Y6BwtHxKDYFb^YZQ7$Xw+_t?Iu;`+SH4raaE0szpVC`2a{tOYsb69>K zBI5S+-H;3H1`KWWyBMga=&l5kU;E;TxCP|y#NQnR>bjIpL<2-(4ArhL&LRH?zfl zorE2wQK8p8g;g~iw^+=(CMaI@H6A|U_TI_jau)YKr61ESk~`%_zlRzN)h)nuN&`^DbmEGCn^PIkHxW?nma_z{ zhElKxcco5WQ_q|R&R%eV!Oi(2Oa$I=DkV&I+;XF%ib#*V!!BJd=-aFvN2nJlv3tZa z78)tZQ)R#UJ?&wfNP5H7%OV2FYSGT;Ay%c*_vAv zrD3^N^R<^c><$EUgzKg_FpYL)pLAP+Jp_4Z7iH!Lye88UgDYkjGK;vH1RPzY*0*#5 z2LkLBfyIX?o$_KmsoM?*r(C&jf$D1@Be}w#ezhL_jQ-a@wNG9i>WvBvb`sqIk@1?LAyYkJYjLp>w>s!eZ22n2@Sktrg;~H2N zDl)R4Z%PMa9n-2c;&8z>l-tMPLX4Z?EVwi|WZCX)s^q4y?+2DvUqDo?eR+zhPEHLXH4v_$571oO32C*Od61R;9SWvZgmSRkVU1_#YxDCipE8CXc8|Td@aK z`W%clb~0JH-ekKJSbeIkd;LMDJNu0U8@lP5z8)c~ao3gT_TZ`=MO4N&ZbW;?fJIPc z(x`!~9I%)GfRJe6=<(82Y5`*5d#?Fps&ME9m0T!kH_z8^-Zx ziLA~iw{aa&mjUoPAaZ&T*=t&tQ6h+5Q~oaTn+O4!S7AM z6=BLME8a!rr+(4d>{}tmLjiE%N(Jtxn`=u*vzX>jVo6|}^TokA-qrd&+{ImC5w{j= z+N9WruBd4Qsxw){W>XC zuA$MM8*A|ao6<>us!1k5cYV8%z`>-64A^QQ1e0JXb?%G!_!AbIRg3u&&CvmdIfm|T zg1~N#1^lQJ2h6{3ydc{v#HHcGY}xT#)j0-rd#SM3@@c%g4~Du^^x*f|mGiq+wPpyJ zbL5jxKatcLR@KV*h2zn&C@JB^%AyCv>LBGo!)VQGj3ZV_Kp$V*kbHh!h9|=2dnRAg z$pB|BO6D{$r#GeJCOS2{s&m4TDL4$#SGzFJ_b=Y;+y@HvdT~_FJcTff6T#!{D9Et1 zsmiviv8CEmVHw7R^fMcYOPBCdA}I1@bJK$|me7Ek55wR@_c9lL7064z;^3Gi_0p)| zF&|P{Ond-EuMD0qbK=fy#Z0!*Ur>5Vs0%Cd zhTz1rjd@kf(8Rrqcp5o=qn#={pG|_X((mhU&o{`wZk&i*waK5QB49v`bob6Ob#&1K z3Cxbs4#lj;QgN+Q>JhNEN+s+b89GKa!!{loH1j^y7k>G?qo&e^cKpL7M?VsQitnH) zvLCR-<)X;s1fzII!o8de`%OdTGX(0ZSSyncVN~s!iVI6|$t7Vfy)D}j@iB)RN74p( zw{BFKLGuVWFc)#xxgF7e0zoSe{i^s8fwm!AaVQW!uu>Ms;6c}D_lVV(kmnGx(m8kV zap9V~Tnb*OaF#H`q)I0<0oy6M5^Xk>&apX5b#sLaG#?Ldb!hvRT_%-CF`cI`(2T_U zTx)X&8ttE<0mmv$`yyFvZ}P~P1s&ow*1I#qJQ$y0B10GTIsTHk?v|P5lA=({g{fUW zI6T&)p4YlK$?wrqrpMSGeQnS%L2Ng+x|8WzYfI!$^XTO567Yzbbdn+st`!YF@;uQD zPHG1D3OB*LMH6VqiiaF)WZo}2PpBvdbq%%40=1f#@%YF!O1=vAY>O0A`cLh3dGhdt zpAxS)^1IZ6V%$%@fw?yXagJs~KN>qa+0ab`CE06Gj-hgNjZ@toHa>4fB`}W%lJMB) zT3IO&i=0M?=ncy@mgf&&yzhN8Aa{KnT;=<&fc3bbZdZ^JC{nhvfShr<(Vzr^E=~>c z6g%`U_q&ew6LGS4ur6&pBtHo5&=NsYjSsa=Jc+OT%rE8N^!5Eovj^Gb#5^5#Iu~}e z!%{XcB~_}4Kv(7$_+V9XSjfHx+`;U!2*H%qYS32?t+V2lh=H3v8-tr}x*1c0Zv%)( zrmA-tQgz8~+Go{*xJI9ci~S+}b~e?h&r<>4oax`@xsLKphW%6`&zV%;yPHHb)g}n2 zL-Rj~4qWx9G>=@*Eao0fWk;}BkWdPCiY%v4GE zr;wOP?WnI*d4@_AyS-LZz%Em}d9P99xH+?BU=5I#Jr9n#YFQ16Q4i%kUH#A061L`z z$-AxU-N$sC2eMK%33?Gkh2Qv`bE*-z-+X-z{<-T6TTYiNp3dY(V2w1I$Vq3kT6P<6 z9eF(y;i8kd+85$*@MFxRY$?VxxuvrpKpTgAd5mEXf_wP6OL}QF>mcfY{>8L7I6fY* zT}BR`41U4PV0mfj{-w)VEp(GVk^3R_7!G^3pe2|Z7} zZU3%cU{e`!F;r=4)G@MPdg1Q?c;lNCmDLcgaW=YkCU6@)C72vUD^%3Mt>$rD;g>AQ zKj2<)XwO$z3=KPT=eI1!lMSS1BG4?ct!B}Jy74^)%3bvBgu+V`v*c(j#{h41a7a$_ zcl1`$5CQ2uHm$6$#aQ9yf-bYV2I5~)Q&zB}shK}mhgJ}p7n3tv^1Wjo*L?iA8sz^^ ztABalz!zqNfON@v=O3<1 z-_-x*k^lba*GA1;&6Nfw2^ojMhhV`~sb7dl3)VxtjFdYr%sb zs`+|Mqu}9e8O;myRftNS(pQI_zk}?5i-peSx6(4o-B(6KLo>C6)Uz|_*Vi=G82>L$QFl>E1Bo`v{Guyce84}SD~tD-;^gFaayi` z8ink4UTp6|YxAvS_#C!Ef5lR^#)}LxJHviN_cNqxhES_DzVn&^N|Si)ba(E2XIO4K zh!HFbMHby7-Iy6JY}M@#NUs~q)tqB_sZnX18E4mn$sXWy+RMY|aA}$AMZ#NtWHw&Q z9v$Jmr6?mUT|!!4b6HWKUEe$baeiojv$J|Rl`Hjt&uObnV85##3{EsQGc$7@@XGY6 z0YkJkOeS)Gl7gDq&^XIznbKb|U7y;Mou%o?34qgY0F;trb9+q8MDlQ)=y@g?f$2 zCPvEGh@eOhgzoIKTu?MsLH@{V_p|LiYJ!L_ zj)9L-W~Ko;yTMwYm49H^wl)AQ>;T#2)l3uvhfcdgLz@KIH&t!)WfFbn&K&52+Ze#tzHX@}1>s@4lj`Wt-!teY31C zQ7cvw-=4~;wOXj?$<|!983i&PFOQTKxojqvMhbPyo5vwqurvJE!qN?5M)M8}wCcu2 z+5j2SN~5+LC~#ygaOrqddMX&jrb4Sgmt*-!CZWWrPTR+}uz+uGs5W2K!D^wzE?>y& zY=eBf*ntn0lJ1=<I!N{Bi%b~3jr<%&TF{CT zF4(ETc(bwOo7Jt`p*t!|^PE{Tl5woO?NqU<owrUMo>Xp{JYrcbpQbKr zKj(|k|LjOx6WwcMEzAm%ByzV{kM%1k%DFp7P+>A5DAZbX>byHOH9Ru16Q)k>L8V)| zYtwL9m?XvWvmUACirwe*m3ED7HBf<~L-6`-cw^Qfi++tI5I#HZOf(Z(G@hr-PRgfY zVw#JG@QzZCCvsysTu=lD_gGU@k>lj+1- z=`p_oJw`PRf>37xmA)KgT!^@=Oyg!@LrT`Cf-G4X%BLJrsUFP;?5K!^UU_+Wn)X8h8K3x+*o8&Bbw{SpEhelBgCt!Z zq}$6>`5b&E{bstootY9+7L45UsAap__2>JI7e6G#-HA5BPnB&9{G1h;B(n25%GG>jkMw!o_M)eBc#169mcqNiO=Uy{dPQZE!XdP zU=s-_eW4iLkxATY`7+d~QmHQcSV{z(Qz*859mlM5?4i;aSKg<=GB?E#r)9&EZ9BN{ zeR|nQJrn(r;d(d#fHB;Prb3=(cEUEee&nQu>Hmza+bZ;?tH4i*dV4uK)!<2hVTgFX zSkEEoTNPXEF&iCd3RO>XhSX)$ncb+(tWv}93@p0#Z-4ex0%b_g&kg*Prg|YSC#tc8 zzO3WzlISUpvIbU@F*_oE#8CTYwHX;-=$Z>-C1m_k+QzmqloP^bT6evaG=KE|`aves zs(EmuclU`Va@@gLguNbA`TEjYH@-Keyo(6IurAV$`kW~(uXyk6#vDL9$aJr89Fi_N z=XAV!5O{pr&5k^57Z^dfLeQ7m&@@WcZp^*WLCI?Tid`^R`7MS_t2bI(UtB)p5`?+w+Re5p&QF%v&MTR~RWI#r_H(}9!S-k+XdlfnU8WILQd@qi z=VLb;VTR3Zf1V4gBqeM|Mimn|!?W`ls-v2x&vZo~+;6f?=Qv4xxKO&i#^@QzRXLN{ z|9fUU`3CpDmMfR=>49a`bq|9*xvD zC1S^8PY#14O?QXah>Fo1i8bg~@%{ez*ipXxh{k4|{mL@`X?Ft2U zF&*9)Kf`zV*xV41lfzRQH{U%=Ho))`qfT!!P)g@@vVfV`a}147xuKXE>z8%_sOo17 zg%^1=AC(SD*0`x^YLfioma>-Fx!hUy=0xt9Usfu%IPvSeU-Ot`I!GYB3CQ_Gq zWUX+!p_fA^Tywn*OJP!N*9|CaOW_x|?|l4Ep{>Gt@Hi(@^UthiD*GijM=@ST>%~Mz zzt7iMc(m){*O4jP)v*+#xY*Z7!drPj3cp09I7fIy?sA4jAm!-YJWF-!jXoS(^roMP z#}K>AEQOvD+fA`6U?nVm;p8l)pDd}kiCTbn-YvE47#TigM1bLeJfoSDx`U0%my27S zvy`Z>@p4W*BBU1^H}W(Eu)=KDqNqGrudR^@Vs;vjRVrHvyYs;c)Ga_#zJo>0Be?bC z3o?AQTC(j6d^iPWVvpI&TY$Xc{xnH*WZfNEt4F9`e1Xj8?pY(Uejt1`Qa39>q0R#H zk82E6VIX1Y|AZ#i)4k>5U=#MH+~ZJ@Y8K^tZ0lZ|}-HlA4An zJFjQ(@NVvYI0)paH|{aL#FpoAc@i{hy)x}H>jn2A#I@VX$k)TccbY0c-ddiCFVgP$ zF!X6Em#4m;luAc(!jb46)6a>&?Hva~_uiQwHSTwnZdC5s0ypKWbq1dkA^(9|u16_? zUL8#MrmJ&)AzBNjEQmnfecz>dg(~w;`_@9(JmP6{tmgPE6KG`!+1>mcDUt+9F>ma`klA?X9O256Rao`n?26W^!^m2L?-`O%nmF zhIpJ;#JWJBcIR(N5ZE?96R@JqD>uc>bW6=2j9e3n&p+ghHB1v%9}8awWvzuZnBQ&u z?44H!@X}-4L{`kzGWoctk^nPDmUM!Z`D2LdXo1s+DHSZ%H4A*?KO{g11Ug>hwp_AM zewnwW2~e@cy4>y;E;Cwo)~};Ba}t<8I7$eNd_#%Otb%VX~72Bi^cz z21b$Q!C6kPEf>%lh5?BRJ(BC9FHf+=Tkqwl1;4Rul9dBy~uuD<47N*x0R_>H?7=WTa0JCZMC9PWh?Ja zg3H52QOGKVMc^sLS2yrOS=CHaLUqDdpyiYGaUi`smFf&1Nkc`9@!Gbec*lD`KSOx2 z=k7w>22-QbIY5AS6K!EdkJ-CPU++4F3*B`55pWqUp$4tKhNjM@oCI6bohebYK17I5 z=_yO4s?V00pc2_(+_li$Eza!RE%#|y9_bSB9u*MX)n}>uY4Sdf8Z5b~%vd0dAg+vS zmKdcrS9H6~bT%VkAW^AvQQ~bDQ0GG_j*JjzvTdV>1|3OQbSa*N0m-G0rB!OJneLMK z94R{Ft6sUa!cVTh@t}g$3632=U#OY7Ig)UVKx$5E91aRJ@?KF>aWO=Mrq^?%lo0OZ zUniEcVPZ|IyB(f?IUIL{ofc`@)=RyT-WFM>!)E@i48!?-KXGW6XlBnxA?iK)N>~1i zo4j9_L#*9OF-3VGY<0{BqtWJl{T3G_Q;uGLY5|CY=!HDrb7<(1=nRrv)LIvppk?cvd4kZ~nLN2on8msKgHN$IGRTWvR_k@U}HhyD7$j|*rth_#2r{8!i z9P9584!+&=9ay<#)Glke!wL`BUAWyJcsnt$c6U(S_%Xa3*Sny3WamzXW$$D+G6V9- z?fCrhS1LqkrfG&duK#e4Hu-05Y*%(MaK1s%VK-d5D?;KN)+NC_ayC%psr;_!M!01p z7bc<$5YkR;Xi$fdX7-PptL65}LoQi{K5eDWvEb;u@NNCgCk!U>lkcx$zStZQ!aA{< zieB}W5&>#$h^|lmr2*-0-4j*E8TTyLgGuO`dyB>|itL*a$}0q~IsBE9B|-_7=0@>f zy!gU%()e{ZQ9nR`0?A}VwzBXe$_vb`VL}Hw5BrmNb>H`hT`$N3+8=Sz<)j0J`y1@$ z)L=}YGf&+Z1Y$aEPOCHc<(kut6@8*yZ?g*#(8MgviLSfGn`yO%%XmZJL>B;aET937 z)pMlIyWmGJY)yq=Q3c*k4)dt!Z_a8QJmO9K3b&m?G1i%#uIL2eGK`lV%#qjarPq7u zz=<%ybGt3YV$|p@OxFhW2xq;pcvyGd&9@w?-N`b9<1ZQ1PQ!Y$7B&Oi?To&AhAqQ| zMQncp`y;YWB5CO?UQ%X?E9clUJj>pDDg}5Z&{59CJSqxwcUsb#yJPB5q|*2gG+`D( zRcXkzRXE$@mdB(!HI+thcsBT3@t?oB+RD_R|D1t&;ClXMW0uPDy12yi6#1tn^S zT8mJiF6Y&C)9SI_?aZz|P+1dC?!}NX_cjS;Z$G^U&*)fX+bpcm2N)>ea)5KIP@j^8N^NgO={sKNn?|?Q@Ab}O65C7s53C0bnv?BNA$1^x%Z)(*%mIKHmu=BGM ziN|sp-|G);BcJ+;`JJ*}j&@QD^7p5W%nW9PnLH=kWDHgSQOd9@)~cA$+K24Lo?PbMk9Bkj~!x865HBR!lg)s}B3+;^0 z7Q<+Hb8Zz};NdCb6*sil(a4166xx0 zJ57;rSV1H-kve!@eMRkgjX*7Ehj2}&SZ+wrlcr$&TEz`iH~CEZVT@Uuk!ji}$t zd3PuKu0*w5K9S}7c>G6SUkcxxtgI)dGAirCV@EfbQ|S#ZGd6eo3`G2n=}gIulY&Rx zIiA{DWtP@9jc18h@Y*f6SDgcsLVB)w!ZVUa*tGqKcv!L`+a)-VhIy2=i*v-lwn{I>UJ?Hg?JN3EnE5-NL}RWLu}e_ zLpb?D-&q&}ayul+K`Zab)m;g2S5iP&50Iuqblzj@ed14FeV3BbbtCwuATt|dE7X~3 zoM%AUCa3o6mriN2?V9;#U(rSjG|cioJzB6DuIp;u5Bm-prI#;U=RjQt`3o4_r|tfG{9%U%;q zIU)Gvq>x0wy_eTzb0ZACQk}J$9L~=av4jno_Gb4NUcuHgB~`E;zPT)gt7%== zSsdG5fQh6T$091pXvD4NL&%W_i#Id{YnQGnm2fz)@Hjtrc-z$F+{TJb>*d@5ryCDX zQHc{`BHt6%&dLB_Gz#*aEM4le72;43=v*s0{p94QpG@#f`XJfwa61r`n5^=S5je5Y zFG;(BMcgoaI3auoDH3qrsVcW}Q3sdu1~ml=z5)XhW<@IF@f(S0}yMg%qc}Jkiy$|qSJpBg&VlciA?}Oj*r!FKv zzIWJ*NR8FcXiNSZRS)I`kO6ToL^GlP#SDZ&PXVA|1%XiW&+^O?i z{U`1pi2;BR_V8hh|BG9|`DMt%1qdsWiA>*z#{L-(y9)^rquN*E$)Cyi`uP+4C07}{JptFhpAN>Mp16p3v-YWiE z_`e6k_7y&O4(O|EI79uvaL9X2H0$^?7JJ3lU-ifCE8Fe* zY_X;NfW|R=9JXF)7+LF2G+7@=!%E3l$}aI*b5IBj4@fC1bG#a3Nh{x;DAG3Mc^yt% zzAB?W`A9uAOJQODbm!=90H&`XSKcs(a_Mz@zr_wi01dmQkg5K&l~=wrJ&4&mLcc9c zsohPfoPW6Lb6$+HdvtfJ>2p1uZ!-;0U|&t7-gcis#Cz5%j8AIbBo4O!NNN29++0BW zV?}qL;^Zt_yGB;cosqZfO1%Op~w#N%9QD8p5@J1sl z>DV98{;G7QKWLN<0wY0}O-t+_8zMZ5IG@DYmqWs5E&Gk%07gK<+hDWwVX(nDk<;Qm zP%@@;imbo5aM$DG?e;Xlt`Y5|3^Zg`o>-0xdCH??e1iOpcE+jeK&Mye{wbS=w!QUs7q4yW^fkXE-TVbm{#L zUSR& zyaqCzp(>A1?k$HuNcc^&f4VHmAAM|-TP#=Cs|>=LUtf5Z?(6vg<>c%o!V@M9OXY-7--^zuh03j|xl!x<9MapVJ7N%MahW zLPrFF&v|bHe?zO>Vrom(!t*x%_TszOg;1lR=qId~*$kQ`ANrE`g!W1tlHSPR;~f`; zgjl`FlyK56aZT{rDOS(|{=QK--7ZJ?&^t1J_P|bl)2Qcl;pF40Qbgh?f{Onc`ySGs zMFS+*Lv-5uV@&{)G*e++c}g-pMU#5{Md57C!sZHsaFyb?y?2t>>;WT#4cu(QSWL9l z^mOVxFG{>lyK@SR@}F=zbr^1M`V(xO_`1&sXLY$o#?k)xiPdnFS-|W-dT+$j5kHf^ zNw3PoygxMkTv25`SPZOgP0}vQd}^DLicFp%#<~ER7`J^0nVsQf`RdA+t(Rd3cnLIS z&Xx;iK&!63ZJ#}tMraHx-ERg5$TkP{Eq0aiO5W2yqcWn^okEFjwKj*di(G9|#08JbnXh*XH_er@Uc!H$ zNPzij;ifM``#pN(BH}S`LlenefY}J!5bAr(jTJtd`lL;*6%lv$HICb;2?qzyi$i>m zHIv3;O`nTx6IOi1QsakL8=a(BH$F?aWG~x=yMn|zsJf)|hVPFH`&a2=K=+RkhiJdD zKWd5Rg@DhXW?MR|S>!YjK?-J0S(!(+eYuwb#5%9-{p!>rPoOP#9;ZH~R-L^BWYb5D z7ortrRj7YCZ0+&Fisz}l7?+6ny%@o<@V~t{U*S~TE@tk9T@^Ke|9FNR@M!9Z-QFw@ zCc+fX62)Iz6AuN!6&3H3Viwl@b|J=*j7^9jeouZ+2Wi5UjD<|t>8T{oE#nDD5(~@MT z{r!fI5GIOpG?kR381%o^>DD^;DHY}LPV^!k^f3~P3N!qrzKh84NZ2a9a7?&>Th@?? zBL#4NhD}G*?4zr>kwjB;;# z?l&6n1#A>Rb~|ATwEu{nUIpX{znPVY{o^A2Yuu$@7lcQW&;Khr9vAS!!84zS@2`-5 z%K&(b#DMH=@IlZ2EBcZI@WOUPikJ5nxqoII`*Q#w`~SaKf6dwdm&JPX7V$ec-$)=4 zvohlBCR~+HJ+o;mn*tRN`4@BP7f=omfL1+iDjbs-S{f%~W!X`g zK5fq8aNTL~dN3KyQLvPE8IuqcNCziVp~L-i=vksYr;00!rV6>0Y=8rDv zswxSefs1PlWA_bgFt3kkZlc!~u+os&r1}Ajyp7nK5~H3KaYZCGiA?H~-RldC3hNW< zTJPN&ChaolH@Q^JwE(=r#=5`mB1=-o`6&m#^v>Sjf)D; z<2Z`Onw`@oP1TF^8~U`m9{HW4mF*5rj~C3o1=38hjJAgBMNRej5}V`s&bC?ViFoxP z?)X4H_Xjy|gG60!pCMjH>+LWMqvR!@mHn66^`0pteC{o^Q|h@I&C_KzMv&>UAMHo+ z)tYBsU+w)#vI#h>zof-Acngf3?#RXt5C zDC)xeSVeaEwrjlEw^N`MT!m-%)_%_|&(2h3K8M+O7c`#51jwl@F3;zj1PWReI8;6w zuQO6kyDg4TD8ZUfGU`j@sxasHJ{_9pP!Q#?OU7N0W~EaJ^gS@&enKB={r zzw%s7UKd{wsV}gupQg5;TQWHWQtWj-7Gu3jz^&wR!HJ~0(cDy>x{ zLejV*VrD2iRr0RZYWfF%>MRmDoXVATz&Pd15ji3dW&xWq5BU{C5?AEqP3HS zNnmavV!c20=xl_qNa4r~XA_GQA&7nY82`6c*?vMRgGClUa$Gw9WYx2lB*bZxqqzzN zJM~~mYqU$pS?7HD;zs7NZ?K25Q!Kl=X5}-pX6ZAcr+s0ORP*zr>;$*3us2SX>(2}p zyCvJ{&z+)5e9SRDcADmb7L3Q=MB}S>p5Cz6NM~@nVUxn2@VYMc<9>*D;9x z%wG3^Nk4Ba{$U>r8UGUKS3BOyb;C zt{hx4?YTDk|FHL#VNrM8zc@1tND2Z9NR6n3N(ds-gS2!A(nvQ-H#49hC?VY)Dk#@b){9 zggpDUB=5<^+|J1=MXwS{cx+neSB72X>e5 zi=vYob1G#`mmFN#+vy;6dF;^MHz zNv<(EwFzrAiTg!an5F*%JtZr=f^&gOi$*HSKxLdQ;4$&~^~QJIsIN}}yKD_-RwIu` zg`>U=d>bx37DYbZI_UfPp5OcZZ;?>@nXD}e&bOiLyej$ccfv~;O_vh?I*W7rkVfd{A+t;f6U0ks^)AhA=|Cl zet$>%qyb1OnlCMx6x%d2^SCa4ILErH&(>(h#;jzDb0#VrRZ!6!X0J?5zX2gnPxX$r zK%Vp7^5{^_-pi>NL9gSn(T?_2hb~lW{8j+$aRmL&QPp4^GT}5#UFgy>2WlhS+D5}-QSmD> zM55=Sg*oh*`l&>Q$*h_8ry>#n28l&USNWimM85<3cy^oum)R6=#Ob#%Ms@D7r)J~` zJZvIzQOxUWGh=s!;3W0u>?VKga(^* zl0MRvipCOI(J&<|c$-{6DFWE|td^0x*hKRW@`%ho$RmX_%`%6fu#N4C0|iHVKqpa9}8V^C4CuPuQCUj zviCHzT?_V7l(rX0-2u5&IIO*8E((i9Bo?3!*ah-8JQl?4e>Y%fYE~$FKU|G>%Ngf; z8?l1BKmhS$K;WCr?N2sUB)8*NxOplIC29Vh2t^H#nAQO+s0+yr|nqwjTQz$IO~EQ6XYL?&Bxb2Mi_m_Y36a~SA2Zt zC72jVz&deR35ettcKLugRjlx?}2{4&DX6i^*{kv1 z`Ejx?$0zOQl1^^2e-|uYEmlCTZZd#c7MR3w!U}z;J;-A5?6%0S zFa&IF1S$7c=Iu|pUOT#1*O-U4vt#Vv5e?+1w=2Zb3sw^4$f-367j%8we=+Ig_H8EY zb(S0@f*aV_Yc~XJHrV+eITlv?5(zdJ6pOQ`kPgokX(9!+xn-LF6~kh`A`J(zc=)?f za*{Q)kK2o_@TM$AZRp#B-o}{6%1WnOtU|JcRGR(sZJ?U5w-;FGtWigW}nW1#$qi;NMgmhX&uiL{bw9B}QIwA@h9CkI9$%EmY%*nx$B-zAIfS@A8%bBT!4$9m4wyp zKJm(o>UzmcrjrU`uTfjjZf5NGykQ&h+jb-M`(O{ot$ry>R-12ogTXdNETtT0c9q~< zBi3kR|1!@(i7l)7N%CB{3!7An&Ud|IODjl!v|~s3!_K-_CGHyDK00O39iz^<4s{a% z*FiseB0T1lumn>e$ZS0uq!4VU=@za#^wo@qVoeuKoUu}qYjrexWFFwoMS<78%6e(w zp*MoMgReQ=S}fOU{;~oT`Ppf|+dpVbR9F&-3;i;T^Kv4Sc+A0k=Iq*18o+TlNqp^A z#n~9uN8Zj}_zwEVifWu@in}8-S)?`Lje5Fa(|+Oe&<-M^12Qs#(xeETTWwTONd9>0%=VYfRT*g1yUa;Uup->b{& zsnVkmxFj|4z1z%~cF{4tbl!0@-)f)8sc*@6iz}s2A_E~-Ot<~|+y$e- zbj>Z*m=6U^tf<>gjNcWIX-bSAVhmdWM#CV+!pd)wUCT}k?XL0s#!S~~&afiO4$<6A znKy|2!a|*~@K1J|)$tT&rA3q4b9O5utqUrfbxxdvUS{FSh=bYd0`9ur6~$moW=%3a z$MJJUgF+T3ns`rybxA)4r;?N>%uqUEBfv#nn>0+jt%rz{Y_GN!=Hpd~c2A8sp(lg?>DI5er{n8!E-3*3tAU)JBmwrAFxhuBjhlzd%fCr% zrpBSmUHzFulq_|g+H8}uC@%R+{RTNDcZ^4&`u(bHHU7`_lDY3TH_(6D7Rz;uJ()kA z{E~7kn|w}bpaUok!maDF@257(6L#l6RHHkOzVl5%*@dN`dHHeNaXJun6V#FP5A?+S z#zcgX8Ty&=iTiprHcbuOS2h;S|2{E}I45t?oWgI#$-9uE1Ui&5h z+jO{Rd^Z1!wy+1B+I*me=4?~zadt=^vsL%#IO-#5-caMU!#W3#Rl}&|;bR(^{W&}e z)=|+<)Rt#^ak(SO7?P`f35S&|cjK+kwth%*t>=L60^3T40V>WGuZx9l0EHkX)@(3b zYbia^PStm?xg8#)E{|5V(is`?@&x2D!I&B~zE|+|k+Mq|NBol!rO#{k8GDg5CV6;l zt$NQ9ayJQCse9D%^lPz5`i!0WX$DBigzokkefaHG)kml0%)r3j6-5!Cae&fORoP?*;e+e)rW< zyN!{7s^$tG51U$YQb#$z_u~$-@w{`i@UA^fkmlLUl)IV7*;vh^c;S?Kh9~nG;C?u#n*G@sGpao%dr!UGBNaZ%Mkwcm~r z3H=k1Be??xirVC}oQ!!4tCBM0y-oqnhZa-mLymOB6eG13uf@3wyD$58l^1m^WNRKj z9khbc2zc%99(epNxkf;pAJu1ctlI33I)0Tq$tN{;dc07*s-b8Vo(VS{D>f94W|m@q zE@E72`~U(4qO8T2AFV$RsT0!ivvpD*52L$Roq2Le>fP2~f7AE1I*3T86?TV;EE?ki zy90KrIheM>?A^X;RYf~^y(Fy55=MM^!4okOLeDnI68ExvbBDupaI>yJX?^2#H)}_H z(MrGx>9MNq;nCRet1;q~GM=}uI@#P~_p_`acQXiZa0njhM1?uO!Ir9dxq@*Y0&08= zugbo)slOGUC0qaa#p3LQe2NDa+WQ$Ry67N-Z3I z4ofnaLw28jkX~Wu{feVLW-Qx73*fHF99o5!r2x?oy!YPhw#ctcvENm2EYwxiyX}v& z@CRm0ECtY3IStWZ_bYspFr>B#KPJoRqhYZTu)lAV-H2`Sh>E-J`h{nP3StooK?O0R zEiaQ5Kd?!f$iTnW^e$-|LB^1EVLZ?zTad8>`4tc0>g50g{0zV?OY#4KabEt4Ee=4F z;OpF)-v59yFMib_oTmN4BGB0HEfy4t>JzUk)RF@nB0IN6g#q2H+UxYAyP^?C;LZ8Cg za$}}%famF-7zlEc9qkg%CNTsfb9x<=X~jKl*>89U|3${&1a*|AYRNK< z`=?%XDQNS5l~q|B@T`fYL*al#)}9Al~N1M7*Q4=l;7& z=^tB2JiHO~gQkOlkPN-h-@fYb^M_n9AT2W^{NUdf$B=q{o}I4Zr2A9}bMN^Bw(=&S zw_1UG2b>4=Z-d^BgSpp;K%?c+^kDn-(Xl9lod0IlSO&rlvw)41W|p~m=GNClK?50Q zh-q@dpq9!;Xi(HOv46`kS^~JQyJeyn4{fR}gCURO8>-7Q`5okh+1z zRY2{t&-NP&k7>q4XRwfPAvZCK5)&a@gTD9|P3z(t2v=IDf$P9F#~@JDvZkYJ4*XfO zmXJ_lc2ExwT*CGX@5*0XHEH$+q81C90)*jLrzY_dgc3j%ApgEkUm7L~`UFzl zDN|H&T$_%gm94sZ#ie^7`hXS>dT*8Y7>lI(agxZtt^#w9D?ojNPTf#EpNRg`T-!J?j1}M%i3Su?Lf4&aTx)uk(*>6)lzWERTf91w# z08M;vC1rf$zuEzv03wfBvee46f&Cx&OT#gIQM(!T#>oG$z9D=EGX>r4AyV zKL@&ZhaBrN5lScy0h2OoNWHVXB$+Y%h7dKvJAx6k?kT;l|JHLj%o4~Jw%~JY|F;PI zo&h2daYZxX|8Eg+0{xuzzU_3?@xQ&4NGvcCwEXZV{{u~u0uZ;gkU#H#4-)M;P4Wf} zEc%M#^!Fs&f{&OT*QfnEr`lo#E^6Z}biNs*RyON*qAHPv$>$eiw-`PtAxZMItPTfve!bA+3`IQ4-hr3O~taNy~ zvPv?CSVZW4kpW_cX+U1F5t7moU8$X%97G>a{NZO^qa3Q@&?h8up1)=IM10`!xO_Cu zGP{+3Sbx@%&=Ccjm@wDBIt#VJHjVtiIRC8zug^2kdHz^ye>;}OSeP{% zOZkI&A*lnvC~2R=WoKyFI!wMAXcUTx+FR`VqsZSlH+pYHZ$WRJc z=2q^0>p;GdNpG@S?`@@ao)t73s?J`UDCjyxF4T;Sa;&&UQ}b1fiN!neDmqi8rlpOj zRPQD*f6`kOxbE(xsx(^%8#s#Bjb2#0lWwxEk~zSxCA4iY}1V zW665{-me;yF>Rxoz43G-qGrFj3w5fxGEt%OWxv0F#s1_F`*?n++JIuvYInh?Bj$D3 zLzU}k5)u;MHF+FHj08}j=j6A9hC4i@hU+iICS59S!tA7KSm% z|IoG^_!fp=;}VHlh^Vo&S@?^hDizel*Iw)qcISo?tko?Qwk5L(_xWSwEN)^O0uxPvq{R z73vr-Ka){qEq+Tc+jgm!e`28~C}|jVyihY6+VB3 zU7tssY+1DKL7)e;?=072kD^@v+CI(h7|UrhY2{;`gY}91;d+`PBagjUfwSrQ5UKS; ztEw&g^pgi0Rg8*m(ZE)vat_zsPTiEjJbiDl$~CfS}ogVEiD+>MYP5DMazaF|lZ)dy=jOW%)(c1GR(u zbgk+DwUudAt=|#DS@M>+iF@v(Q7)Q=e~-ppdlKzqcn3hP0vOmM-}Rb>3f8BN<_}zN zGj0U(ZblAds|L4Y@SzW{8>O5i|Nph9gRxIy3_3p(s6|kF;25#cLD?7a5?=>onYrK7xU@G zhzBX0u+tckp`UJ62)yq-gq)oFRJrrWjs$A{?TQ`^-N2^gb!;JhkPDBmay}$q+;Cqx ze*BQzW4%&Vk^AQwKX1t3uGt7YbcW4|Ro*eJ?ecgJ{YZ_KM|!FcWjnx6JxU`4?6K2!&HlQ+6qg&a zaNvu4{lkTrr;{tLI4?$lUEdm1My`_yd^?I(?>>X{&rVV95_P22hoLpf-*ubv zgIi98+jlm!+hWDqC3CdOmJ6O*w~I6sDlI8|7MZZ7G$D}#lVh=Bj(hqM15-2Te9J>^ z?boYXOh94c#F4#h^kp&O&G04*R--+}YOl$EViAk#bp}nn!_rGN>$Oho5Af+kvP~Exb3P`eb7=>}*eE3Qxu<@B=`&2DU=p z&4oxXEp!kvMw3UJ2s8^T3_0Bw?=0tk*N#yV`dlH`ov);lb^{M=rkuOiP8NUiGsC>@ z_MWJRd9)i^Z!^9UIPiT_x4W(kttD_ttm{ z3upvTiWHAcOAm++TT*CtmI`f!sL9zdd5?U%-tm&2E;WgJzc(hbil=ek5aX{1kCJja zqpCRz$vERt`&<(H@d=I&UMa>6`g05j1_w4vpmD#vie!R-4tvaEA2+hs6vpw`tBvFv zSZoH>?Kkn{M$saDK6aWJ7E2B>cfNVkQ@vTnG8)BBUeG33?qRc1o^UuUmIWNpe5#iB z4mQN1UA-EDnrZYH%@e$vrJai?9_lccOro?u=uX6{L4C(%`?7b$MycN)^W9qmD3jRW zh#KIOT03nZmL0X};@{XA=iHiQ=nulh-}q#eH`D5FdLP906DfFR{Nak{#_%JJ*e+%2S>bDEFaHV5UZHe_SszeZ2L zYKs5bH?{ZpR=nd0ad8j1Y;r4Hkh~6~k;}2bs~5w{=tDT5l=rsgolXO5woVp0;;gvv zYss?RJI@yhrHX)&k27xf<6g!Yuy`WJrG(t!`Vi`!x>J%;nKs)zOTI6QWmCLXI9Lqw zaDI&Xk0fC?7#eUoNpl=AbJ<;g<$4ep#LReZNpSN7q7QskIc1!OYl#5z&^}xxMeVsX z`ERM0p{95&lToV~0td~=y98i~z1st*r-wW8Mv)9JzlAc{BM5KiLW4rh7$-NAkYJPk zFG*AQ;c}3L8sy$5D8d6-ukEQg`60=x1zC9Sd9up=`4Wgl;HQ^wCYWe~S{Cm(%^RJW z37ky!@k8~q)rwX>H&YiA@W(D!VYzyK5NHKXMvyW!HBEnr$n;1L;k+IkJkzz)N-yv$ zf{8(^Zm0&?VcaOfM+zbQxruie!YL(!`-F^xo&G#-pCv}bOwk@Gff}!r3Jq~`FHvQc z@lbpENA}3>(uUDbwss6YjQ~pi-$Tce*kQ+qtR+nX3bO{AsEw2E1oz)FG+UOdg3)W1 zK$`4#>amyNEy&?mp1YR^dR>jyQ3X(C2;YiQF4=5|on$N1GZPpPB z3P%G2Kj8Ej#g-$6x0{h^EuV-xr&G*e> ziNT{$9>}-ZHd>xTO;xmfG!DEkH`G%iYLy1|z? z{dLv0&&_<71FatBr!!S(%cSf3sxlj5=Gq#m87q6(3U*+onA%sCYAd7c`4ErguK*0adKoA8Xmq2NeBxn ziO}S79{MJwZ$;BvQL~#QcIr-jN9lE9W6%^V=l2?rA=bYLfeVwqNx0DOZlKOk9FJIp0+2Yn-`O+rV81vK4@7f1Bd~?%vUQH4Uy4}3ooL;w$oi`>9=K8-X z(itb#-IjK9-sh|y{h0i`aeGkj4sN6k2Ssk>;?xJait4KJ1CEeeYHx2Y@N;*0nYBf= z_sj^>$C!8LA&!QU4YmU*``sSC%!yp>gFInK&$NuE;;8)Gh-QCd49?GcSufI+93kPT%@-^}@TJ&Qk^5=BMqeIgwO};Zd zQ8cdnerEDkN|suPeoXRPW|iBgY-O~Wu%6rz8saK5p{clE*Oo*1)b0E7WR+UNZgSs) zdLT}c>#4@2kq2^la)Jx|G45kJ&O-f}Yh>ZmYBe!gzNC73y3%h%Blez+O3tipSw&-$ z?@3@yM&lSU-lYg&-Wyz&XMwP#!W)Mxk#k!IfOp`)!xKNL^Fs2uDM_zM^&aiR|lvqDFS^g?j5Scj$r zAlYHLCJ(T)3V)_u#G|s*z}G*;1q0vlK{UD~6Bb$D$axgJ@Uera%0BFNBDWP6S-Ccr zKI~Fg6ubK>-3R9U#YRe-G|>m1tNpr@;Bq_w0}%mPkR|ZAFgNm&o1}EAB!)bp3$uOR z+)-okR5s{~a@Ds6;pP_QmDU?p0dDP?ULc7$+?8`na==);Go3?n^v9|QH}8O~*6zqX z^^hW?+n>C}5F< zuv&GD0Fl**yu{fA&5i86bS_bImKI27$ptDNX#B-Z#h;?9yo&R$D69O0FPY$nyla z={N4;!Sfm#Bwx=4?w@D0K-4^rRo1IMfBZEX_$%$3$J{UMV@IJ!Km5d#Hh%}-SNyKT zb^SeUkUR#nuvp}4@Gf4<7nLCAyVHDN2&8gBO^*1&!w|l?(cN?%pPH2L){S$IL%5lT zCWOh0_u}nkgDzAE_>evXXes%x2l8Tsj5S5Lx338B!S8#zRF z!ha<9_mA3opcVhyK7!SW6_y%s`PFF;@LxiBHhv}(&n{XkGX3249p%Si7W$Oy6}srBHK5NTuc;p$ z-k;wE?9U5lQ+ol=`PY}Hmyh1Ng@xNr8&#nHnLR_%O5sPhR1%wO18Z64s;2ewtVGmN zg)*tufi}(1zCRu1;uLve;B`O&Lf=v?pVH>?rOn2@4ma zwlE4Ou0E-rAqc7Islx9_2cpBa40e9mNT$ZFMoE&8NjH}$ULe>Dd7Ppp%?8w~sewvw zn9-h{>;yjfGWM*MH|vj*iVFaGv1HWs`NA4twL)NE(?LW@b6`l8YQ41Ys5ro`{alHf zubAI?N6;(@tRQ2>KKlBGhtSKm<|Wk|j18_Cq7KZI#R2dk&WTyG{GQ9fYM9o<@e5($ zcdEJmdm@talI!DT_qn;%EUU}DD1FnAnD0`DrfA)^*fth&-R!3Cu-4FsC1FxiG9CPO zr^;nFOj_WKR&5Aa2yq97yt(u2-YaVJ5#C>>JE|+$AVdt#)V+cQH^!*v>|8r9bLFMm zUezJK=lT=li}QA$xGy{%dY`Wh)m!KkNCS>rFtqaxZp`I^0=rSM7fzRhlwgU#iluk; z^jLVM62Pg39*lQy2jBeo=uAVQNMYNX(-uxKepJs$JQpEd)0Rp=N3W^?Z1ByliSp|$ zP~W&$Qa}V~o46#RA!(tIm%I7g2VN*2bhB<2n~v;_&KYQ=!XbCI<#YY`d#lOcAVue( zfuNz(vB6w*tBLOcL7o`6ZT(A%_pBx>19(-|TXIn*aRYCr?!6s6+g(3a(e8ROwwJb6 zZz091Q}68$6lazGs+t`DfKu|&d~Xuod3#5@x*@r4I|EQ);q7dW*|D2b5-=FNaAWgB z0j1`lbWRpOhiCIOcbwNSh)8Cu4ho-;b*HFsu4o?!FQl*7#?0S2H~vZJbk!Nh9wycD zm1osrkxN^iNJls#cTG$tlC!^Ve0j_w0TgDQ>pYua#I4p^S3J|{UO+8Yh#cnUn(9-Oo z#wDJNGj`@(J(=lpY%UEake4To6@mtj=6>g(Oi~ZbQCU`dS*>0*jz=!%wY5>Bho1(N zabVt!&VF^8&D&q>pQTndx;xzRncb)Itj@8lsGn(Y(-%5Y<545!x`EFAXl6$>f!hU< zHMJWeEGo+FL(6;cyPyf?;=4%r9$24<15DTKc<7*=*(j|%$HdouyR-oHkgVVaq!Ws{vyEMaojGzls!E?wrUNb z4>XD07(T6d8_VaAjt$qSc2&Jj{mj^#pSSn)wT-b$Y12F3jW&ytDMO5xogcJPi0;2= zY3X561tH4w^~SE3?J47vK-$(dE3)-sIgYEj?LT3c8`sWsCkYvf3)X<#I*K>nG_w*7 z?ev)~%V<97PIlr5d*GId&5HqPARsIaH)A;?g}i}a*j8a;VPOtGue=L~mz>X7;SUA* zE|y!c>2k>KIRTlvmu!P2cW%jAk#XHw1PU6rqDs%_y9%>p9wk|rli zOAWr$(Pf$Kv0~xk62}GBR*wPovc^xnn4;SO1@EA8u*h>q&f%G8)O@wc0WlpOMTkCI zxc92c)GpcgTG98cy+IxLzJ7Pb?C7+nqk%4rLT@_p)Uc&arC@$EQ>y+68(Z6vDrupL zXS%DhmG?V|jZiaZvsgWu_wUB|P)8+aW9HsWwkunuCx`X2+b0DqFGA7}qIKYyPj>@Q zPMO~m*o|bOY8-R0l^64Hq~BXU(4S18+KlGq+x4huI4jKD@wpdWu~W2cf8PA8 zRF<49DT7Vp9QRtDdy}@8WYZBs@e#zOr$At zMzOjlk7S5sUYiL^a`<5v?!K12nbdyIZuGEdLt~6bzioA~k??g#;B@qQo-;m4G2I#y{DEjuF*^e3ha3L0MQp7$T&tlBPZ-zProo*DUS+(T>FL;d`9F3m|l zwBVURuae8l^2#y0fr`}}#?jmSi4tCv(M{Dz@5LBul+Us5r?TrEeM^E zXT)?Ow15uyHyQr%OzYFZ$evJlm|KoA>4eeAu~iJbYkul!b4=imqRQ0pHEakKA9{Ku zOi6(lpRS1IJPJY-5*==++{+GVd3$M?XdnWd#k>2q+a)5B3o#0NmdNe2fQru1`x6{g z0`}vd4u-rF?dfx;rm2TI3{_poD_MWvFBo6?emz~iBF-d&!%|Z*^C{EXOI_K`5{s3m zI}pz;(Bd!{>L?-z`SQM{+{&|Iio$$F?>Y#&8 zjW{JMWOG8x%tUz$?%jj}!xHV8SiD@U+{0*|@Y&nn?*@dRHCZRG->*(qsT8(X(;a_BSIm$K0mubyZXMCl z-?u;r5M9mWoilx8hW*V{U}tZ-`5I0x@n%`N1TlR(&qmdK?#d*g=6%B)bPKtS(IQEi z6T2T#cHKdU^rWgcb+ilR{c7F5xMHdyXBJb{Lfq<_C&cLn?aD30Ue}@%eJ8g{7Tj9G zfkQa6O2W%u`rWKhtuFR_Fjiv0Nlajh87C~)H_7Lc1{_eqX46TUZ`ZEuv{m-mFuws< z)ZcQ$L7eg#6ZroS!HGH9|H!wM$dB>+?#k^_#fUqyd?xk+cE3Y8=o<cNx-brkfi@@vK-;w>El}dsKem;u6#mblAPq+(>(|fe!(`1aKi1aW ziz|@Lnq@XLMiDM^1)2fZodDn~S>ET#VySDD3^(PEVn2VI{uGy?c6=Ob$*NqQ-YU(9Nqixg$3R0LX9KIFJ_+)bU ztnk^qKV?e5p8osP^3gE^%Odct+)wTur;&@A6v%bnK{?HQqjt;N`@T43`OkeIvn)Z{ z0cs$KLx|QHurTF57}*`e+!?NSE~2U@T176&VYuBEO5Hmkv%B-cS}icEU+n(!EMn9~ zZ!6gtyA5CFbldf{~G{evvQ z&ZNWYo*=(z?i)JgNgDMC$uQ>oY1tmFX+H-EeM5tJ>d+>%HDxq;!xZGx=^Kk6=!L9R zr;KZQLo7zjD{p;1&PShUa`Rcz1w7J9j_l9$2)uRoVU4=Mq|TkntS8r3ofV*@9xmCo4-b8^!=nH95sXWb;fzLMGpw*2eBtb1C6cq z$0j5!eJrqlfij)HS`@ax`OINIwktO99O`1D3v;&Hc<7%^Px-`bQtI@ehDW-~6Mu!X zt@OLq{b+r*uPD`X2V~>3POg|=o#|Y!R zCyjU%<>!>Dfg=b_!O{2-z*-MkBoC6$h3<*gx?o^oIc^js8D2blSOkCSZj5%cmw-ke zc#fR~wSn=-#SL`){=z#hC&pTe^CxFsCdphrnFur2h=o9a8uw9eU;O!7^}WQJm&lko z0$(^>K*ON(9905P7Z$<~1%_e-IM=s5qvbFzx*1H`J@o+9vF#%atEI_dTljGPZ8)41 ztD(sch+mxrrN4kLoLejIqE*#sl|o-&RUOfZoaQLFQp|%@$DjX zgz17%{=NUxn}FL8D-opnS(oU4k4Y$!{`b7r)W{qy;F23|LqLm;6D2HGTMs=(;QZ1sAFKx9<1SXvkfao>_NAA><*)$3y zVm+$Pz;eTMa^`Cnut6<80H>8Agl}EmcHV+N1L7)uZe^PCo!2h^a5+q9AWk4?`+raT zQ!4*YR{uw6$z~QzPA(CGx$PmuoB6`zj|F0B#GutQ49$yJUMhGg2+>;-5nL3b+!`+M zRHA*b!zwX#cVw@#Vm&C(&=vYIqASECK`Qt~*uzv=k!KGs?3EgT)u%zieplvk0Eu_m zB5(bCw*olzN~zfDc%mvUCpc5OLOI`Vfn^&Rlad>})AV#GOH~-1 zX#d2*AB@D`DEsa?Lh(n55Vim~9VD|!dFw)_JV;7+JRBnXEcakMM>7X;)OT8fxe7Q= zps*hW20P+TtvA_zzkhAmx5~2cqKxh3Y~TaE9Dz6hie{VhRj5?p*nEJ**No%zU|KaM~|dVt2T2# zk?)lYi;1<5S$*PkIbZU15Vd$;xLpTqh>MEd7yJuhVjX^~9b_7to7WY&4~4=`$II0B zH+UHCzql3AWhfI86hwHma3ZjMFlCgcw4|Q4YmtU0^lI?YJSt_0CQ|1|b=C3xm;vI% z=V{j4ZNu{0KR(D?FZVOHCI}=5i)aR+9}}p}tvghVn|;qwPm!-&Em|35OCX@(rnKUt zxsXQ-19+FPg+ay7`NFFP)nUX_pJV|HIMv_3f6pe}S~D04zR70Xf<}xtobME)l&Pc= z{nS;i+t5wO6U>bF_e z7lZv)YljM4i1}Zch1x(2@V(mDs!5cC^y9~m=Tj!8JvOU-^p``)t+yK_%*5-Mws#A> zIyBI8j!T&<)N@w&*B_a6p;7%#lqLJ{;X~+k0tY)I&h3j(Y&z^bskNOi$9Q2^Cqg>M z#Z>dc*YKA%cz>x|GMPw|y&88N#B@Y>VhC@v7~v8`I2T2Wqei7Wy3yl|65)0c{GG^z zbi&MQh`4daq0W_cm-$Cg20i&@3e~z?>_dVfNP?Cn#f*uGIM)kPc8sg_sEO55;dpSq zNI7puH8bn$6QAUeodLkm!rf}*PLV6`LLYiWIDJZVTOWOx*gJ9ar>2t|ZJnyw@4BEM z9kWtcY%ITf+1=7f(tbE9(=$JGtaFFdakcaxaxU60&q6a%8Z zk_|ee&Xcv1pO@%wjT-lIy_~v$hEx?Rl*yB><8u96znP*6I{t z-4(}Y#W$f7!8k5Vq?N zk|5DrX11FzWU9rz@$OmGUo9fUg+LDF<7Ydv?>?MXc!%~rJIflZgp^EZagX%cz5<^o zQ~^h-eG~AhnJVEsmkA6+CeDf73CmX}hbsM@`>sgAeDaY?^S~qlJS2$a_RGtaI%0*m zJA)v!GVR^!ltZ^_?MT5wZ2O~2&6EZIyfZDU5@RFsXDu`B2j}d(Q`>lv`m0UA=+(Op zif5bd@^~A|b1A4xcmY0z1NhA`$u8eZyS!90tiu8{VEI;!{J;AFSe`%v??S+3r!i-8DoBJU$OIfRQqJL})CM?&axq@7}!z9J~(=4Tmana5(JtS>4-bmu^B6 z`s|sUuCCs!?yQJM#`MXboL{dh488YzgznsST6-{~4IOr|CwfoMDRHXdb%gCH`t0U~ z{U3(F_wiWk;2zkRhkbSn8o;xNPtZ3#NQ>Sn22}QT1hCfE&Uv!n`=^I6ZrsIK5Vtn` zl!p1Tfs+i0HS$RHv>MgZAS|rI`j_*Rml=34C=MpKfcGa?41`P_;LE#CW}l#bDOg`- zx*bb^%!99Qz}ET;1)e`0s`+1q8H*_xPSJ2u4Xlg6At)Jss(jQw^9CC$^)RDW^g+c6YV^u3Z1mLX!o&GzxH*6_LPVQ|qezmBB+m;} zF27@}6)XgYqj_906fV3EK=xn`8kM~v{(u3-3Y}}e9lLEM}Y_0l(b;2eiFkCDSM z|C`}IChg^5ff;A3!d+Ipd`)hzAP1_~@eq4C+Kt zeCdm>-aHP6uiSIbX_vU^)bFKtycQ33rsN^~-2WKmMg{n!Gu%rXy3!?t5>a>4VXnX# z-OUYVt4T$hQrHhgC}EJ?y)7d!*jM_M*VUUZ1#t(&gOw-%St?{oN>1z=1C} zP+mosR~JAV#K0urHuUcHC2{Jn#sw_{x}XpjiT*En0C%6SJT{``?q2D~)k_rsULm0a(HjQ@m^4gmKdY1L-N^}wDsk>lYL5_U2wVP7WygZPIp-GTi21wyyEQG-zy-ELj8x>+SFa?i z5r_sttjO!p{98(FrN9L|chFbVkHUXdCV(juHPu)ayE;{_dXEiDWx4Eo z>NogxVpcx%7k)$GWLo(;nD<;xotK%*)YW`!p{{>E9~1|`f0sTK;s0BG;$T6q!CyOnf?SngH9S&3mQqL~`kq6>&U4{J zj~f5&S%Aq1(D$QdInJvAt8K>c^m1AZ3)I$?0g@z@ui$53m#xMm)61cLV*!5sTJ@ey z_#^9i+hyq&WYLUD-hpIv4q;W2z~RM=>@|`1r#wm?Xo<9ku{^Y(aTi}NE%-z^>>-!# zw)@@u)Z$~S;?eqx-lV>it?d<`;o(&Cm~uF|Q&1gip{$anBtOcx9W<_y z_2ugVX@c8QmeCK?_|XY$$FLVTXt*_;DQ0!7L|co3U0EiMJ8U+GNxdW}NtPhIyIbxn zaQ1Vs)2lyodr&zb5{03N-b=<16d&?H=@E-nAPv20fp<&_aD2ARtxoNGRy9XCN4&r4 zw-ky@ZyHJK>L$<4wkoojFTNQmwyRJ*Lq*>V=5w((iU+Dgww@##v|Vi&C)7rvLh`CPoU;k zqBb3CiYjsR{jE?T!(`H%7j?ezvTRa|4A`QEjd`-~Q8SWhdFVDimaE>J#RMFK&;2?6SLTiQ;=18b9GoUj`hG0foaeFo=BO`P?9Fm%0s5 zqE)B8T8X7tr)y8q^Bp#{0NBH)40o!s5q-WzP+3ST#8SP+vg!m_?vp%vC6_LfK)K;E zyKtV0A!io}exdG0<#d@zY`Mv!?m%z`cxWVXQP9pxM!iDZ5fPoB{wU7<&UQ?U?WG~6 zr6I>~z@5FbKxc+{RD^$AC7|0lq}!rdHA@v{GWjF1t&;C2@72||UkqJoMw;q-N30ur zO`rT)x6M6YjHE>jJjB3rH1CTVsdS2~*jz@iDJfBQpjZ)v!ww5w+@0GC+8=n$Rl6H* zD5oLO84#MPW{c_d-rzuWbz}t-s}5q0ukD1F+9eGN#TN{1=R3djDwtoMR%`nD`c7@m zb$3-f)UprJia3XP@2+aD9;> z&VMqGi2m2iqv5@D$M^eBrh~XZ=)AdS+<95**$?5Z`M2B8qF0wCGMOa!>~%hT_<&+< z#Bv3Q>=hjr*#=%Ipmi)zs~22fJ@_aXm5W+s8I?=?V#v#&V&{BcJvbQ5>aUb(p3KLs~(JLM;7{oyCwCDb1oBjPZ{sM7Ism*w}+whkUb7|9WfcB$^`%*nwTV~|_u z+|#4^TeS#k0sGfMTL|p+b)&IZLQ1U;|7>iG#T5H1rVtpA+HbSYxOW@NWzk+ymuttN zIaVwg);1pcM-%yNIgm7BKMUCR9(xAgpsDf7ZFeMpI~it(uPh{ECI4V)(6A59M<5ACCd-4t_DGoM`{Jn>>tM$&2Q|CLE}Er4$2tzucpfifa6W>RMXG*);4;`FMm&{n03`Y z+(}|d`poMf;VJ$=Jd~}geAH@nNDOTR)`E!ysW>=zkBc%EjSFCv9MM~<3`R97h=A|no z3vM)}NWFK{cE^XV_Kn(9K;|SMPKmIdk2iyN)IZ^-goOpJ5tw1d1RoXr~tUgkFH8 zVB7@x;JXKYXd@MyHu4s`F;iaVCW zNCC2!DRJ}nH6z?9*3d?hpf{uZKT(FPa*u0l+;{A|Z!J^t3)D<}(QanCuUIBF)nGvYqqOr5D&0iRuob*LT;s;Dl zWjja7j~IU4ePcXEo=y8+&7Vlq)J>PP`v5{!?$qU9xUFR)7sF-3MYr^*x_XHy6X8vS zaqooGH0zciE~Y$OvbfzfRH4SEfX$#XP?=yh@_sdMIm>Dvjg{dU1Z@ z57^!!6Kns$tTM^wFfV=KdVqCy6J=)0xR|)yojUmBevX$QT8jLS&*B1$JZsg&@xp?O zj@y+)yqylk75#fm@?P!n3t<2F=a%db)2Id%coag5%2&k`49>DP??$YeU5m6L^X^dx zZNzho!K*#ClQ}Q`MSaTFEvgR^4^ci}@HmeI0 zZAS0^6s1wl4ZhK{ws+)4cxNbXWUQykycCJEV$F~n}6Vo014AB zNd_H-uF5I=s8;?k!Q+}q=WE>aDy45P65(;}>FewNnkudQJ;dfH;?I#l8>Q>D*BcK7 zjs=F>!!b{ijU9z#lAIInb-ulROIq-jgtA5aNhsNK1zsnl1S4FV{Tq@7u-u6vDBn; zYl#x16^P_C{Yhwb#JrI|mEov#ODzASdFNE>u~> z=Bh|j)rh=lQJVyePf@&|VCM6^=>eNm9gVS9*E}=4oNss1;tq~OtK2EV+N^fXNgjqB z-!`?8bXP9282Osj9TlmnKu{U*O4txGW7bjRHbuGNRa{|u;n6~}Q~;z*=%T@)UY#NL z8yGfr(F~p#T0;|{=%svu9M%}WaMAk{TY+*xZz0k$%{D9e65w_F#DvHPV47+a+1MtD zn;JL+CROy^-<rQ_W#t))6AM$fRzD+a$W2&_Q>}6;`-v-lapPK>Bk*W4+XxgF?#0+N7*2k-DYLVqQ(Gu_KXee zS!!&~TAoKm`~mQWJ3w_|Uo)RleNQV)zT%$Qrc8n70nyx{DMjf#*LEACbu0m{-nsmN zZ+dKPlIeFg|ESY#!BM-fdfwG{*W%H`mgE3)7r+Co5MrOFQ>ixRC88x-HU4n9xY|a) znCHtxzN;v*72?fwsI;KJ7SJC~E}iKQ(SZJtZ?Hv;*`xrq@l~%tz+3No-*qEe3d8Y{ zM>Unzmo^p}kpr*Yj}KwzKira-gdBcPB+P zX-<<@4{Xn7>LCVsa+Uzz=5ndL!)I~a;}yhM)(;Ccghk&i_rEoVl?&|0`&ax13Tr~0 zM=2BUEU1Vs7!Vj3O#WVA*0rOwttC*JV850ce(~0@`_=PEYUtc=Q|cp?qetJ@xr0Q24A;t7lghuQi&dJ3V|L z?w9q*-tcWehMO{}fZfE-m5r1lC91H2LKA2BU)-!Trr=Mciwuh;D;meDY~E|lvj{CU zhRQ7R{JZ&9j@gLxE39`O)8ewynW#mNqXuRwdt2I8{V7$<~L$4VWuk(|*Vsvr#n>*-3!zKn#g2`f@%`i45;XbCt>*E?Yo)vy@A(&Kv z3(zBqb(G^cxS;fi%^Aw+T7~YtT*8=D^LC0c1nl75An;iGEO>^?pjcb@rUtvi1NQtn zArz6Y;4pSK`%BzB^v|~(>`qmQVgFJkvRpn>C0=azYX>=eT`zc68{X?Br})yiANuIv zzw@;AV1nr{A@g`IvHhi76ln+SzOG$u>=C9+y*WOFe%vMQolda3J$BdRRCfpQzkH88 zU-zpoHMqJWS$YAki;uUL^QMpl3hW(5?Qi`NKtkwyu>etvFE|&U7@jcS`0K(6Ps?Pp zGkBlo1`7RL2I;Sj+S{|uPWlkW{~=4nN68Wat7+x6m)fRK@)!+jPs8Rm`hBNWjiR6L z&Y-spj{hV}6lKJL{DY}Q?}^%3$Q6}5BVzd&Vix&`-(wnu#Na8zLWj)VO zp*m5_^;cvc9!z{^kg+L)#@{h{<4uK5k#zGnAlC_8bL$*=N{yc9jx63wiVq&oUm3LzWZT^pY<6dpX#*0~ z1+1%F<6BX$@lG?rab0vug4cv)w@f(c!esJ!oDLV}?Mu{1J62wL@t;7CC*yY$^rV$U z@SjIcMi-Q=+a!1`GTCXzLU4*Y-rU#jTp>79p#tS9Tcg)m?~U7{C2-TTPRBR4q(U$~ zlpwWi`w1tKCj=V`v7PEa4*>T$PIZZsSyyI(E>Zh=*O@Ny#Vs%}fiQ6P+xdu`jpZo# zLtR`Bw&R&(5|ATNy2Jpjb6q0F#Z}$k1P=M1&nPmuDeNTVkq@~{nr@iLr;nz*{1o%R zX!rGVJGFtz-#txXTp*UOA310KQpwWVE9lyu2>2Z&)bbOYqG>cJN!KzxRuX+bVkPGm zji4|kXRX)kz`zJ59HM;2xA@^EiSCck{VtD{zIp#ToL>jpy%<*u@tPZ`tQ^*vcCxQ& ziR6fSh)*78nQF~;JNW(}oSn&I&$E$^5t{5y6)UG%Y7wDe;_>lLoMs}O>8Pwm@e3AU zvJ&Qbu-#&A@Bkc6dKPkM|@gdo1G(X;n{#G9?=z!$n)ure_@lHopH| zo;ZgRIKJATXd1GvF&9zTwW+e`;iNeC0(hFO@!j=zbhN2}>v}i=m7o@|X|n2r)*8Pu zZ2veCGXqYXDF6lsb70|;kdTPp8hoZ(WLWbXo}ihOoIEuh_f9&sIl}O`3X5w#wG)6m z-*AXMyVx7Qf0yb^dYEdQP1#i`sXMT_!I|Wktx}n`aql7pU=vQ&N4AmX@FfJy>bVH0}BF7#p zWfZIgC^I&5#(70Z(aFje90Ghy6DmrUYEn8XWo5zurd!A$`Wx}~3{Q#XKUC2B?Eg){ zXrDr#c|BLE6x2_n6*4r}vGB>mGrH3sk5;+TxaJIh&^_oD%$%r^`jv&88`;UN`S_w_ zL1rjmC|a-1pO7k5V=DGZp$1L4%UlwV`?$zOLPFlX{Y`tG<^7#&%qncXA=YXhOlW9m zT{R6i;dTxalNsHqwti`A$8-BK6r55mI2w%yJ3Zs+j$zay4&SPXU6K+nVgsyNVX$<8 zd2%d9uTQimC(tws;0!fErL~czE}i_0?LPuVTZSe_^U?7qTW1zd(M89fu=EtGw*3Nk zgq8i`=#d)(>k~h;Sg+Btrp+6V!zTpQ4BKLRH$VRjpR29==|~>ITKVD*OfuK1uXg!O zGs3Y#ZUZGz_Sw7)901&!?6uDFmDSgfh%e4Eii&xF-=d*WYHHq0%{$GI0Zt+v-m4Wd zsz=}_G{$UqI-so5_U@Z6Q#=8uqn6k1d8$r&_;a^=*C<3AEfMB9O%gNeg@{5$1Z;n$ zY~&oge%=lMZx?8pqdUv9&#<|>FgDTH`zrAGJSY?@z$CJE`HCma^f~@OV*Y z9lR^-^pZqEl>6J;;sHcM@1{M!;WxA$1duYt?0o5^uBO4>rnw~g- z9RA^l9Ay*#yU531xt253)QTFvzI0}ar3JPiyWFF9bEf>iO6ir*|LFe7@wtP zoPk-MBnIC(gxGfiAx61G6p3hMa}3@TcXuyK183{X6f#QfOm}!VwO_E@ZA32OV#^l5 z8{h_9tG*zdrsRVN!h=0ya~`(UeQrty#iAFh)d3JOuCk+@5IXS%2Ss{gRQN0d#5xH@ z^_dOWn?aSHID%@+T4_E5)_Qc^A-$f&bRz5u#9lvmLL8w&WiY~3H^OCGr?$kcDGS7} zsR_wJ=`ptIi<|d0YIQ<>k8A+=@G($1%K5qO6oru&ILj%o`+Y8{oj5zd2IUF?x7%Jh|o)j8Ajz0qvsv=8W6l6 zO9E7O?sfa8vn6}{2iB?B5hc20BxRBdVP1eAw5MqvR=!ydUC?%@ZqZCNGh)%q$r5n+ z>hI>9nV%NeTA@npmzQ^^a}Rg1atR!nu_W4`R30x19UZpiR7!g3@!#c+KId}BwOyf- zrI1%@jMzm+*hKTzl*PJjvF5p4!Z&W*OpZ@<6mw@VZ8jOHR)}`nntP#I%?rPq{^08p z|GxSRPsbhx59;_{9oyQYF=OtRK&sm~)kx)Tvx|Hv zo0)8PUJF6${Lq6%hNXT4dMa-(j^2#nHi@j*>4^~7FwqSBF1;3%w6L{ov4=O6T_={Z zxgbp6iRt2oQ=vRV^d!sWZebhk0~i*{nKOI`_~@i~B|LhPENz)NbSTzcmpzuJQ)N&M zput_RA;Wlr4N{l?udu<6n7!gsUp}{`6v7uv<>RqjMsP!sM~9Qq7dl6s0(#A1dA)j~ zC-7kZg78*OHJQlXp@DK@x&1T)<#9-lty!<_CWj*a!5n^UyO*zTbvxLIeBp~t>8-xk zP}=wVeXr3f>{FAx?`IvsiDY@5&*J=NVBozIF!1)@U?4b|d9nATkiYf>@DKc}ES^$= zodXo0bs5iNZ9w(~BCUF^IVTM9VzPs_(rPAQbyTABA{y@;rso2}GLK(iteC(((jy_> zy3!FbD!)BRkgjm>O@|?~M%O5^4%1_HnmqGIg374 zz#4eH#?~qUjrWV@Lu?$Z7!N=bU0Wa0USEn3L_Ua;4{qVCJv4)tiZx(!xi8CRC>2&D zFmzVRkDGp%xy(TL#T4kNs)nxl`i^A2n_e;7;_9Hij@lUN8Gr6{NzJpAiNs-(AX?w| ziIg0VORw)wRN3J2Y`j+Y5;2ABAM+EiUw@*H6s@ExlAOM9Fhqvcryk zzxsEaPsk==1NJ|0KHUF=^L0ps>9`Y4o36Zx%+`!{-0$j|cYVI|IWK@Ny zs#ZgrXFO@ElgG4Y92VPjIs?H>4qKk_<&VcY9@zf zs>WC@HI!Ff52EMYl-qDYB<47yu%_4y!DD2uw(ZxKhl${M@ypP4rnxIQt5pTma#!FXoQzA!6?z(*kMf{zxc@U(2}kaIGiA#PIu$ zOrT5xm?Hc&w7(XA$O+tDCu@#j2VM?_tEA#`dGrdS_l>(C z3D7P2w zd$cl9`acZ$%qF?qw-@087ebpNjWfl$U;UsuNHoT&fn&^Jx?ikZsw6M&#d}u8wa<%C zY2sKYH_sEppjVUG5BE)0qos?{(peI=X?1H&qWSrlBXMCr4!9IxX&!SOc$B)8UmI0S zHnX&s+wAmrB9RQvb?mKsuT4RCr%L!(j{Djqh8&X0Yh0s-pPd5!bEPi?cj@uQ`k$bZEKaSOb1!R)68P zT4JkB_S|PO$YYKHAiq3Dd5qp$J_eBr_{1ya)^Jxu|}`EYZmT#d8^?1R2vq|(C$-Y; z2SKzr9i2L0xl6%gU||AQ$CibzZW6`wDh!N*k4iKY0$uDEHFanRetJjwumbaoC?Hp^ zYvI7{J;f^MZ(~*!2S5f6i*MbSF4B4%D2lE`{+81!|89o41rd_VrA#&dja-1a}|FAuL?c@23)Rz^be+#j$n!BC0K2YNfBh>JN3VwPv9 z9iYGSxtMOQ&YXgDkok*u4C%KlSgxJrD1X4cTohbc9OeJ^l!-a6c|rsyAsN?X`aWJ)cVt8xWDHxMs1&e z#scjdH$mU$L8Ac=vLxDosl|C04NZ+O@*Guc!*uX4mVNJdPjo&u#<=XUX;5fF$povu!Q#+tA?OFh7fc?)`_!Pg9UL)+)m4qR)v!-F+6YNy8G}!mnXNuwO)jdG_21td! zH#4}o@>cZrmXIa=0bDcjUHPH*?*jvQx8k^tFuEJAcdQSVI@eYmOk=9&AG(|Do#~}u zU@0D7IiX*Gf4U^bBQ<53^Dc?sl8IzrZ(I?Qo2&uuekmr2R(AWX*D{r$%{@1p&E6x+ z?MoNOO2_c14S5NV2USDpb!)0r#Se8iTqn8&o_u;D5C%ma`=wpV3N$1n`khC)7;87( z`a%TpMOhqxv`0rf>KBRIS%Gf#E6*aY04 zZ*Us_sF%v1aMz;(lXec^?Z-XU_gP(zoPDp^!nrhlQJ_Euxc~HIt7(EG6F8$dv?Htb z)_($>E!WIAJpYMZ&k+;!z7p{03$r(Ae=~@ZjaBXcXi586b((#Qvw{iqZ%TPypSKY$ zaRtG&0ssPcd8#4OA-}yOL_b(}v=OY$<=Ev==n1I48bd=Zq2F5)9N)Zo^P#Ve?!%9* zW&XfD;I9!~dF1$oKlFgAG~A7Mg;$u!A>hISG)IbRHKL_2Ttj7Evl>@5S`N77@T`Ce z=MLfK?YL5@Vrj?J7Y#V6=IJ`B3O@G=wO8^2{Qaf7jRV^%+|BqRabk-i*PWk|5o%pw z^C}F_Ra6~jX)KmcY}!n}C|t zN@Sts$ChYIJw`x$;+9nnW8kG$_0?Bd8kNI`_=JRa7mBa1$O|xTL_`HMdM+OBE^$e! z9otQ_$suRtPvEpa}r1O#yV(ObWU&LiDN9DD=KufcfVdRCh7|VB5b%b;n&! z@O~Zdk&}kvRJ2>Vr4M!h)I*mD{ln6f~iF$b*?gTy6p}ze~&E z26*M`>ruS?z6py{5OWbU=O9PkMwDjKB+*NW=XEG2RAjDW?vi@7)2nM7&*{R>C{9nk zfxPJ79ygE@5oNnl#VB|SX7*~ZE7@GvUdwYB##nisPM^0zSvs=@S#MEO>#A&mp+VI( zn5`Q&a*v-bXT1RMT*C}HLXP3_f-gUTEt=^Odj1mjN9l|woD zDP1jS_qfM-_R@u9z1_{N>dFC2>OJ=Ud-xR}4K)7gO zqJvuXF?pbN8MY*RnHWlok+F_!zjtVa+HScG6zCpI;xWBPDw-j(jf@Xe4|?nW}Hr+8!|~=hE?eS^HjKu)cl$75><*L3^3^~R7HhFOG{x5T+q0rty0=+MRXKp_et%J*>kWuYiA}d6BIdwRS1!LV z86!-_wvXfjqR4ZBXl}?JmJtBARLro$;y$RwzQ*(Y+r4$?xqfl4XGdWdugsDXxOlx_6n_8uwQT;?rE9SIcfUg`r*4(jF6?L0urWv`EI=?Q0*I0PNMG{-_LClR ziGl#*A&GdIJ^r8Fyo~hYFO<%>|MVERDb5gj3=*Y>r)g=|Q+i=^lz&UNOl$qWWnd@l zy;UNV0Lqr;BpE&rD<@A6un-8nT)Dv2GdSur?1Hb1UI&p^CT?V2!Y~L4nBV!gurR9ox8PiS; zhi|}zta;{rwElF)S%VaxfNo@%nl}8qff7KQ2>E~b3m6yW06oRol|p!Yp0XN|Eq6(L z`C46JbS8>TCI2~X7uhn+9>?2KK9vIWzEYXC&8Nr-1dv%7$&!?Ivt04@Wqui)u7LOB%G)t|>${P+U>EH8*PbuzP= z3%$|12FlelV>AsCDjiCsDX2eZ%gKkt3Sc%L;{2RD4LFZOy&qxk>eW&EqDg&mZblVf z4wyb>2)>NKCz=uA5)gV8UO9o&?}95%6Lff>?SFlEDzT)0i*AG@n~NTt=r9Cwg>rw<3B4($|oOAz(iWY{!yg^{6sT*_L5qY8)z$i=R)|^BMh% z1-t;7e^v`q)zc5}HC_55a`Gy}o+=?<=hVRXs_&Tk6xlXIc?61pJCt-$K#j6 z+dY^*Oa4oh&d2uE(6!>3EXm;IJx7ga zZZWA#Q~bt*yNz_MxuP5K-Lf-olhtY#eVUy8p2(UOzU6AAblyejo7r99`rtB>kgT=#fK2#8Mk#tO!_3Xy#5u6)MXM zbsqElqOWaxKi94fpy$Q$OHd0NUvXk&nB($r(??IYx4O`yGV>*8@twTXNR2L_OEI>t zS&PiBhHLOG_bp^QCsx29rB?4WRa7D@YbQ(kN}+U_*=_wo*0tsfIi*3`tGV`;Z`@5#R;)FE=P$z5CZOPURI zJ8fQbGpnX^;8wO;?vzCrcW^(2uE&cb{y}yqhj!b_1Uli-aD|Ac`;d{EwuP?eUc}az zi+zi6p;x74$d-GTlVo;lt%E?Q|{e z`#%I!SZ-UcZ$zrHGHXRTt;h8ZHASvh!xB|EW{E8Rh)TmBjK8l*l z^?n60x<%y?eeD8i*vEpJqy3i+{mx@v)`ceR`BQ4ouGh-rk9i$vi(G~65LeWBqoa%a zFE680j}3kPfEZf!BL8N6cJ^X^B?-JD?w3ej zD`-Etd^()^H&+{oMR(!u%!)|4{k`uhsz(`t)viyUjf^{1C+03-psVTL?ShN!5C|Rq zytJK;Az6kHKXQuF@puL8jew?VLl3CHY3)4wsRB3;+}g0n@VaFwmlGexf%-NuJIZ39yG{LLTZ^ZRrgl}TvjhFVMZm_(^aG!>|9JHmb zuwAzJxp)Bo=(k#gC3b`V%4DvH%F`5!cW9bJoWrj%M?w`XbmH#cVLORtZ!en;?;+&z zNSJTS-tpGNzJI;;JYM>&jzb$?ck(nB))697PI3!M-ere@^ zSRi3_^=3_XF1&X7X4wJUy+Kk07v>ECp2|nmSQCD_#KWyQ)UGje)I zKK{y9N^t8_B+D7HNuvcJ#Sf}+dcWjcSHm}%%Cg=1o5hQ#4rW=p&V8V3*EzMlrKUS0 z<9b|9vFBY!BxE^*&>kNR*zCX{9;y$Z{mn$!lh0T|bi~!dVGvg#fXm}L0S5F6pXIva zkD_ZONS}tK#SM5~cIiN~mklYya@oeV*AQsM+6FEAW6*^c&$^JDnE=m}$rtTm_cb)1 zU)lXAuXt2;)yxcITj%BFEkWdc+BWBJf_2R#LZ9ypJ0>12e3ha4z38_ix=Ko72KV6~ zzKK_?{Wx|6ilA{WRWiuUANAySQrmLgP}}5vKJha%r;x&=2K8hrH_^kRP{CR>RFR5B z?~^MQkL{-}-`~9}b)2ZX`n>hhtgXfhqb|RBRRqwgNstTC(9j78w*@c zTi{;_Wrr8~n8$`?B~4Z;+{2uSh1#`axMV2?F%7Rr?PY*p4c<`fj7<24gdGi2umr3i zLE6yYw0~uRK70U5HzlPK`rDeIBOk2I4vYIYcmA<(N`cipuD}1x;pX(tsE!B%r#!$6 z5On3I>HVKY8v>I94A8-EKh8`K>GNRd16pua*4_DhnAK?nRMbE#Rxv9K4Ka-fQ)~%8 zHOKJ5#r<6%AfuLcGlaC@G}VP{pqpj(7&BbIMDqB-ZRN9s2Bbi3u^>l#NY8XuGy6<~ zT;)GxNAxrPy7O;kW;haVhESbdv+nS}63I_Q8TXCq2epKrpMjPjEsC)OK@;<3KxU=&z&7Lz~WYzXLpOa-gm9TrncQUADKspLKlP;Wxq+IAxVQNJ&{|I}1S$@cCD$+2 zW4zyXA6m)J9)><2eabZMBJQp2_CVj0Li3h7eN>AEYlJixn$hWdK+lD47S_ub zC3uO1x%l49X|V^A>zFKj+|}Fk=)7X&r+s*5j4{)a!|_)pB;&gP-Dx6wiR5RG@q13R zOP_JiW~(=;Vq8{9dC!{_^i=YhGa@}G7Q$>nWvGjBLy_zpHafdp-2c2>dCWUAGxOUC z#wrsm^3??L_ICbKx11YF#3jrfhzXh3qwCpMPy(M5?AjU}+}+ z3I)5*_VTA=h%uZYQOO+9$015P7HA0e&YtnGy$b3s0qG$Ck$?GWK@%t@Pgyr1D z@K53X5Udl-Z{PgGDhB)lRAjwOsei{B7&EBI9^TFWhf~ZNaDpVs-B029JJwub0P6=c zBj4ZeIlT@lYN;fKWB(mYl2B3W`?p7bU+J3+h+0+iVO@V7|Lhxz;oxbLqWk~h-Qxn9 z_gc;K3FY5m=`DC##Ve##f3J@uoS=DYUlgSOxWAm;3!(&`_N~7D&A+eo3p9^jXG`kE z-w~5p8a%C%ti?Z!d#XV5{{J)>tnUWjo=*_^59pwT>6$A_WgAg=_$u|uzSJ$OSEtr? zFlHYuI6DZFj0;|7+>uR&!|tVAzQCdllkFAzX-6ws`Mh;_1(tMm6{FOt0)dutdPwt; z0urG{sgFLDa~*zCH<3I<8(Z3GGkS&pIX$c3hGoXv@ZO$24LP|$BkqC6MsePdlrMg* zrvsztTg?|1dQ|j*e8cn@S{T$DBadRS22mBW{l#fi zWiEw`BxA9~aKnbl>i14QyUYd?z}hsVD{h*4JSAW*Al-ZhRY71r5C9QCi}d^=s?LIf zPbi+Dry}f2$_xS1O?r}BE~l=A^kS$v@`aUcB;`>nRS3*1{qxDQUAm&EnZ>7WX`exg z=JVv8{h6;2SZCzZuhEeBxY6UEw&5`(8MEh~E9;yz1<7lO$j>t~OWZPVZ*j#Tt$7Iu ztmjw!say!;mMLtk?2;1ywgB4@AKF?L3{F2u?@G+?VfcNnQoEA~VVy!p7nmlFCiW*g zy>+-4=lWk_egCXjXWvi+`_HgZUxvSf+GXIHMSoXCPV97|oHZJ>=QA)vQ?#x> z!y*55pEsB2=|iHwq{066(dqwsfJ@ouQoVmv72i-h=dvowihtjF9uz1&ntA_-n))DW z-b(wz!hF`Iv$%W#eG7E={OxD|VHhIbu(15qM}zbo>AKbnL)HeBi@IvO{qCEDi2kCC z?itN7y=1OhrBPzx#o)fisoH3VJqERb9u{tk!SK277H~cqEzpNJxsZZC&<^Se-aUfZ znp7x^jXK3F++lkq@AO=<(zZ^o^S4=j&=-h*sNnwh6!9zfXN4O4NlBRfGaIbMA+QFu z>saSqdhw|$-=T)GrY64Z1I!+uy`|;K{w^6#BcM3&6Gjfp3QhDYFE!FZWnW+Tu5|@H zfc99jdn~dGU^?r$$<}3qk9H~tIRm!lZ332(T#U3`mG8j%^ z8hMU>w7A~HVgIRVGaBX%`n*HD>cv-T>3qAcdx6)sH;WnY3B+U? zBZ-7>NuODjC-#$L@RtH;G{1?)tD+fTUPe(%1HWfzY*Phyr17vncV{Ff=9Yhe<~nNh z2|5;u*THT^7TSJv#Fvni!t0nFgPKbXRjp6kb98i87R!8qVaHS({7#|UiuItYr00o@Q&|CLr{~4Q9fru+KK)Us zunqT+=_KJ1U$Csp@b>T|u~Srv5($$ zhm2Y0L6V65j>FFLlBGx?%`rm$LUE$|U!zhG>n#J`a{E&q?PCVr@C(%qtWAvZbWrf*E2yYcbbeS6)`JJ;dz$ zv6XU3zcyA@*2T?21zuP4xEUR|Yzli|?iO+VLq$ zhMrR)RqGQQhy|K;JSL1PXAfnTBBD(8wx|y6j=r|3Tj8JuUg#K8c-~c;o*JcIVH1IL zuURsBKb?>_36ktq6nGrDwF9f2TD4Xfnq7GaUD)4AvRt8q zMrnI(efM$&km)D6w}X$6##Xg6jETfkGot}krM;@Uy(?o~xgAXe37(djY^|}66%)yH zEQyjYeigY*!*7nwRbN=^= z=?$3umyP%)Eza~E7SaWNO5lhj4lfB)Cgo%fZg{(#Uk9Wp`vl~Ml>n~U; zD}7iu>Zpyn2pYm(DZ#d-bu@5+gG z14Woq1!!vQw+fq#kB;~#t3N$eQJKO}G|xWbZZYKA?v2tN;1Z51F$g*+MD2XUt+(=2 zS}m$r*;NM0ZTKASBqIk{GY@-I?UC}~6^@$2wIwBi>vjl?(%9o{&!a8=<60c^Lkq_p zYpZz6!te6iy=q|3{>t%s!jbDvC&j2conX|U>P|nM-Lnx1&wXpvk*BX`_t^4Mi>BBNNx0CZBF3qwZE;t`_wW|(Nbdzn>s-A~5)n&Jce=EF)1sHH zO67bZ$Dr-lO(BRyG%og&3Q8WhT)*f9s%knnRRgV7Q$3_}EojTW!#$+G~4>e{6q>s&Vw! zisR^up|SlPHC^Wn;>(|JZkix2(v>xpq*;cm>KgDK49zj^Pkq%5Y|+_`iR6vfl4=~z zT2U?8aPm94O{L*HUXM(t5L$kjyixBxWL4A6w^CYsv-frargan09q&_o8glL2kiu5{uJ{P=?P zRZyy6{U`ejB*&}gGqFMt(TQepe<=qD`7q*h~oy1opxVl*l6jQ4@0Z;Eh)P@HqS;PG?Oh!I?FOO6n1myQig-_jD=TWMTKW(*VjVqLoxgtT}bWdGGRy}w&Q zWHLB;U1UvGJDr!n9f2d*-prP3DDF>G@=-WyjmAj3Sl2U9<;cr<*r`<8xoykKwdd#E z?XX04b7;S{wewJ&w3oJhE#8J_%EEhvIKc*Mx9Jx%yO8&SZb$f5*|?1??C#IhcK1h| z9o1$Xd7avd5k(p8@`a+yqF>#127aQpqP)7>Mvji9SE>)_4u+accBd{9i|WU2OiT;? z*sYkKdBkaYMN%`&7s3y(lC=XQV9w60k~F1>JV{cO8k`Khwf zyP@48GaJV&LRet##X(mft)kmhs`~QEj4)kU*MWY?#ZtT@i?-gQY}P^RHP>YP{R;VG zvZy{?EQHDR7y8v-584_mFMSYC*|rTF6KigW2#P2vYkGNov+OaL1H^_AF=RRmOMeV=W; zMFY1{CkLTGnPsiDjZFu4x8EW|#0qXyVCjWZP2b)zVN%#irnquJQR77_+2I0f=5fy$ z#xmGpE1v4pY*ph7H?Q~Qpgu8$DI5lsM-1n^HwH>ThGcHsh1o|J^x z)B5e_zu&7mB{;v6lNp+AB9#XGhy1s$NUzP~06H&+Qj%6^rMHMV>ztBDTWSyw8`R!N z&(jWz@+eG}1C!w#*6PA##wGE0DsjF8VYpf{bL{1k@6T$^T#%zbKVSP~_Vdew=%Z%W z+8%Xl4&LLe>_(-T3ftHv#&61I!vcGolL{MgU;KKqHILpOxaAUg>~`MXiDeTLofw@z zgk#~qD`z5pd1ae0S0nd`D*cC%^{_tI|+N^Z*oT2 zSn4jpujz=7W)6FxS;`@T%DF_ZO_bf`A5vD3wOaVppG)SmF>?F%7fHF+41^7pT)*X8 zs_RKS7P~snBNAULZiropqlIo>tepefy0M9iPkgww=ZNQ&_-9@|kr}$b;V|+#tdTC^ zHxp&AVR9pK%FunhCScKj!5S|hcBNN$kF%_Lj*CC8`pLv?h35jY%%9sGr?l%au%sTR zk}+4w(HGtQ6eJxN7UanH2-hKN|3>NAn)3i=lLB!cW>kE=UJn(?YZp1FvQhEc%a>#v z7=>P)XZq6J+x9i{IL=B0*3^l%mw-<`xNr|Ez`}8@e6IZfUgC=eW}cDXCwPLO$gAmdhPq&=lOp2?0Mer&-48pHjfl(?w)=se~*#~In(%5&hckwfbg^L zkBg*ynGfJOZ!$qA?3ryBP8OAF&f0)AX4qc_QnWe}A~;{i5MmpiJ7H@$|#_ z;@tPs$$q)1m=|VY4dxmGX}9Rwq#XlneDdD7ZS4f_^CxedbUowhxEB&8G~FGK*M{AB z(bf;C+sXDopo`&hSZ&u)nZ)1usJdO~7;;5h80k6YvAGIL25oPZad7qC(_fdk3;RxU z6IQ{|eP{bfd=6fFIA4nxI{~XbSWV6`xO?yPh)9ff{+6fDw%7cIv}AeS*G4<>rs+07 zgc$jjFtsPL>G%w?S`d^Hcc7CxC$gRCY@OEdtY%(VM0FS%;kez4TT1VQonPli-IFH{m}O0Fpf{^K z73D^q>f&VKe|lM*&AETYkm((av+;NX%F>A=>ahN^&YMW*y$qAXJdUf?_WQ&s!mVQc z<}@2>Dr3|hNWAFLSn%71qdsD_S_Y2IT%$Lm(td_t_N+3fs8~5+~u;e(zDW~E7RK(X5a??1$3V< zq$|1IMSM4pnFRd>fgO3U+LAy9p;rLhuRUJwR^C27YivHmx*>Y37d~XKDrG*)Lj72J zj$0XM)&enEuoUOa9KGsksJ>o1Z=bEW|$nDkra+$RGIg1&^zKzDdwF#aJ zeVvLFUUyJe+hKOMj#{blVn}(;fsjAv`9pI`#tX>>JBaa3z&Nu^4#Q|ws4Fu_CXs_NueLxxA5%B!sHgt%d`)~$iD73(O%hw1yE+OFZ zKW<}pq<1j9amVT>4~cGey=!2P>Na)1I{4K9vy%1@2N$@ z-3D_fKcw5d*H(>o80aGQD@!S$R0Mz1@VYfEa>^| z)2GCvN3LChY`l%K`nIqUY;$^=VrF&n`w!`y*+1H?=Y)Y&)BQYyACZN$GDpQ6OJ}zz zI7%AMgVDp|^5TEcN>dXD@+04Rm8OP7dKLPZQVWW7PgWFhyo_`*j|4Eyw(22zL>{{7 zAaLf*S1F(+SPT&aQ83Q8YR9u;wz~Mc)W9n0z`ZHS7e(gJ&Xe7cyPy{qg;9_o0mr z!I_pzIE&V28urn30ZXoe?lL#(pHY5a?$?6B4B(hlNlTK4AeXaiPJE&Tg&_@zfAr9r z1Xxc6H7uo7Trv&cF>qknt z1BS=Kd@Z}4i4vC^Kvb#MEI;!1$qZ7^8nBfGqDh|wta~3;2X;vyhN0@$aOGMwI^ke= z{_Z5?XUeSy%#%9&!_R7*A3B5K3Cc4ii$SLXJDIa)e@9DF(R1hrFUvzGpJz+5;TK?M z*1co;o3rQBl~?DzZb1C6{@^J!UWxpgfW;frUmy~$u6YJ? z$f_Kl-sy5SgeKV+58NK^U#f@$B2yhGW=-LLkPIjPkKuB28p$Q~JzarZq!FjG@-0vAcO_=D#tbMMjmXg7s=l&(^2#4@l%Pwo zo?a#2fzVlHPpQPq=tD1-^G;9;+w}Eea66LolMTyx05r=oxaIFK>RvR^q~y-#N?QBD zVBZ^C#M3 zi9^#DNc-|wf4SLl9InwpPUhvayMKJMmbp5R!}w#Ko~J>V*i+_4%AYyGG`b*kx5&#l zRJEGh^p`TOWtk-Q+X(N3skF_NRrmP4uL5Fj0~h1Ugx=9Nr%Ha}UU(!au|nuI#kbIl zyYY1ZX)ff3Hi_M!-f(s8N8JH?273b5dkUb?vjuWi2)Rp?rxA6hK3o2 zX8ucM*w6dy^L{z!d^p#+_6N9N#eJ{)*B$&+MGpTu<@IaVuHh@lORHVGhADdO8U`UQ zHgIKZvE=EsYiRlwQc_P9q@?JcI@+09SesnC#vJMLv03Rc8d-q267l0tl3AHwGn3fc zZc?h>Aimxf<89raZ;^p`LrKiTL&kHxx=IG0m!C{p(nk`@PfB`Z;!dVg=Z4tlo~#@3 zFo!RPj$Z~=pYm{6VL2XW(cok0RXV{Qo^taQ3)l~pe!}Xd4Sv#vYdEDd%5_`a4yRXF zom?w+iDSp;aH@NFLc8v8$5Y9LULET>Tz@Lx5=6{Sv)G~djN4s*qCkmxkve)9UyMs| zZ~=Dmgq@PCiFjL$TT$)k*+lBmV!n_4!-isS(?`R?YR5-a<`K@E(eLbu$-{`h_un-7 zk$7a*t~HNW@$Lt!P)JVa)0i9H4m=e(?^>6iQGDar{Qf}n)y2snVe;INSg2+Ocmz(; zU|-Mj*G`hbJdLw$OJ{Z7TGyc&$8#$qqlxL4a@VX0-|q=sL)d(Zy}4vF|Hu-@5lQn9 zMoSZ8s!!ONTwg2&Jfy+-Lel&3Xzpn$2Y5Hd1&|+c|NnAtw&tLYr z*CYwG_}F3}|Ih2dV{d6ZvH$yViI!M&Oi{hbVvXznW0_y8;(~T5{%dvMU#;szaC?(V zHJ1O}_4i|RYhTU&u_>k=i^M={Y_DSOKeh%Akp7QNiT-aK09sOf>+kd4Cz4lCh^&J1 zD0Dv(wR<{8SY>K{di}=rUXMyHNfOBjObgr(ewWbS zBbDbx3lA;6+1Vi82m=6$O<%aqo7@p_p$JHm1E*J3)p_G z;LDFWq`WJo_F=iD31hW8;OHGqLdOVII67jB_}$JXidV+%EE z#ur|UCuhhNe;1^^q}sOU5`k4@Cq4YCC!?fzm&gsT_cd@jn6e6tr8%gK6+x4Xh@xqf zy4-~P##&CUP#}(|h~7+x2L7*C=L@uwYh0)N^ zQ8k*G?5{<3YkUp-4}vW~%HhvH%qG5i{-Dnv#YOLkIy$<5SMbY&pQF(504*I`tcO=n zGZ^^v96>_Vrz_JjFvxIm4fRS}_AC+C$}Y71dE*Wm`1S^`c4=?9Jf`TY%pSo@LZ&ML zfNBBX53;&p@A!uqyl)T8(SU5iWdO|IFzG{)lfy0f?njalU*deDAN*l(-v=4v#$dko z`^4~|04~TSm-&6Uo|A|-{2>U`R*jD$j&NUt=!2=dA!NywZCQ*d7$}ZmmZ4+#jw$*f zp+B39M)S|QMCxc@#*J#h+bn!$!7|DMIG1Omd-_2lbXAdUWwZD<$Rw`$z4UvbpNt2> z?1s^xR=nbMy?X%5-s+WS_y^0T%2Kql%dy9n&!VKB{ zcbos0fcA0wgU(aD|0#X_KH)lV&B@feh%3-|9CeZ$gLk`zE6f*vwvNS5u=LqgZH`}D z@DGi~h^N4h9;kMG!%{LHeEWTIvrkNcSz_P;9#z==OVYe#ox$6<1b3Xb#$;%?CDD{s z=WKAd_6~GH!kR1Yi!IZ5nR5@G(F*!<;yZPiH=Gcfk8lyj2uz;|)V7&MF6#yn-DcRKd8_|V9Cm}MejKJedk0%^>I zK0whbz(jYoc3vEBh6FHp$N7Mz#SGw)BJhdxsJK~j;`chdfbIO` zFqU+kmUiD48-HVZ%Vm9ppiY!R%kr93G`0x*hmOw_&yMx{hLHZ43*{60xA!|=^e=F2 z*rysRP`(@TL9Q^&ntd-+eex(@$-M#es^PFCD9v1x6(y?;h$oJg7`2hD*f0*HcY%SZ zmWrs2?~`NmEUm(wQnS&*=TFL*lr2X-Z5zh3-xzf)oUG6)iqkiZRNbDJ6G|T$+=eZV zP&j@KQd)*0MVZ75BhAn4np`vz+bjR_^t-`%kt=cmm)h@IgGkZ93$;t9`%k|M-m7I= z>KXzO37~y!5fYS=R}^NnH;l_>$`E}~#iXR|M>x9*cg^(ru!RSb-r<&4xx})sL=RS< z_|7Xqo0g~0vGRyB#Q69mjMii3uc1mUHy>>{)kf`{m$I0Cc?*I$LFjc>iu)J^m;01J z!}fmB!}8PcqZRv80}o}r(?BcZxt|nVpv`W%n0{(xlk#mZM7g%B;7CQ**1CMBzWT_>@>PxjMeG3O0BzjVw%r!0m*zze~R#PM@Og4dk{nT4c$Pk zdC#w%k3k1emQwIRR`cB{wx3PN3pw;a+*siAOapIM6@v`6^s`pR4fvWHUrGnhs;R55 zq$BI}M|2O5jgLKbA?n!pyE}(7N~($xX|tQBvSCzH75#=>!_A`hi*Ro2$y&;VlU6$< zbGMl8L&7=ar{Wh)LnU_enfeRc>~fM49*%1erbp1`{G@72?2?Lfh#O-gi;~vEvaK^X zETTK7U}vg7W&4w;RFzxYc6PeSxyYK?$w!~OaP*tR=!#occIj>Sw)q-ayIVP9Ak>A-+jb-u5PLE$vI2fhIKamwqiF8;w9 z3sH2RlzqXCl*8boLa=q$bx%xek#E#<(Lo2Hu8E2S*kP{Q%Yi7JN%s;fMO-$uWJ%87 z1W_E3`{|RsnwnaUQ@je7-8MROuCho3G2-u<>ZC$V>ClhiCS+$rNbHr(|0Xe!BCFNR z`}FkZU+0O5+OEy=FhRTJ{`q&1q?R@df7NRK3y{BIuA+$#1w4h4jv0PzJTtN93P$Kr z@IyHU(}+}t7G!H_^o?iQYK_+x7WbFlhehdv1r{<-bMo>ggx1QF>JQc|?dV;=(N$?? zH%0Yzs)U~P(QpmHV|HV;-5>&xgOtTqYwgs-lR`+tR=BS5c!@rj`%kh41q{wDVY2z+ z_3PB$L-oo^+?Y_Yy6vfF`->wH)|1s>9%#`?Jf8?u9N8$B=DzRl6h74a(O4tyA_%;|5 z8ENEi%~WH~7!VLpah|`WR^+No;Q~9TaQseQ@pA6I`}PV`@4Fx)$LuD%jW!|rLh?)|sCi^B|yO2*asnIf)F^NAH+rv%BW3!GE~0z9QM z3wn3@d@7v{Q(-@An>o|&YS`t4c=)iFi(XG@BspgkE)!Ep#4vPO3yKb3^~q0ll0I1% z5QY^B?4+Jx)fnYK?57>zaC_YlHQGykA}@djiFn45aA8D!6lPI%y=b0fem7tI$e*gx ztgl>qgTEcb{h42_cE+;-zBy#L-30UY;MN$cgTY`Xt8$eqO$|s7>0|5J3T0V+q*>i& zmSH!)Ra3P)G^`n0#5w3k8MkZ)#-SKH4&myg4Sy(?n`Yq9jZ(4K7&JPF(;avDdbf3} zE?CC$%F6UHdsY0qyztKr-6g3FoBLLr5j9SPDehBsh;#LRQA><_ph`C|9uIuSC2378 zDT&91-3JNz>?*jcWj~8i3#}mV8pv@+K>41&PZOIV;2d&kZd$$W6`5>TH%PntW+SY8 zsP>e_@qFikKAGRHd~0tZ`*6~3tDnl%4_^iP*St3@mBZsSPXA!bNWt!tOS+A2?QyL| znLOlR0RDBk;NpN?T23M`oL9WOz`^~oxBTF*XU-!O|zoR&`4i0)9B?zA8THwld2L8r;o4NIfR)PE6zVs>d6x}%6^ zTdXC15$0aepY+GQ^4#Q9RKme^Q*2WE7ghGbvj=^` z+ckbn;KVKBM>jhRZNV8$vvE)@s*#b$Q|gVC0>4C1$adwbGsDzXzY3eVV+P(%LNA4fj>0T^t(eY zZX*ogfq1`PT8q?v*nDJJw_?XXDbe=MA$o@J^R6%U!@$QCh;VbM zVFC@b;*Gq6RVB)xspdO)lI-zLr1{E0KgZ~tFxLDX9k?4TVc#MJ8?uu-NNzJfI2y3? zaWM}dn4f6&X|6*vwDzD`tAiKNBvs{0@Ae)f@XxbU*9Ib-?a^sKo;k;B?y0_ktJ$Jt zDcc9>548)6&Y;y=As&Y2r7=Q`pEBn^ZM5Yi$zJR?lBgCQkLHq39q5>um0&d7UmGvT z$A_1?uYPAzT+ZYVND3FMDe}7}msfBIpx`p#p^%h8r(J$!X&2O)s#O$IoqJY#{RciwQ$mh@?5* zbg%jzf*rumqMXpNerx9Lj&qI!9k`|ZHR7i0`pJxj$f?J@Om6>bGPaf&`!N_BiK`-UDDA}%*#)W z(Zm_^Q1Z( z5(Y_b?t)F^YBt)tg{t`{-wIfcu&rS~dN1@37#|6rLYNZQJn(%5#D%cl_vvSe_vztW z_+#8IOkhe9=oB1tA3Xqh+Fvj6*mdi<8K<;zt6n z;{OzYI2)u^m?azw-&gK$f{PKKyV%^(QwT5;`%!U*C`%D7)RQbt*FL`3C`x_%dJP3gM z!z#ALq<4`ksy@-dOHAEtoK3b7ga_S-g-5OQDewe$#URG>orLHV6EhNZ{Zdh89*HbJ zJvD1A^wPilr1W3uepnWuYSoR(PGx4e^hN0tCHBI;Mqm83|MmNLOW?&I4HW?r! z*QAhagB;@A+{1l%S!mZ&Az4*CHeLOy8iROB7#pSYL0Vm172wy3!cA2NLU={q9^PRT zLlsUCmE-SN4_1C8U%Z4U4+tux7_qk}&(R3=$kTFc(Br>fzMGWroGGc;d`LlM{>|wv z;d_NNMQ9N|$#^OXa;g&IA=5P%#!NJ`jTV*ot`-Yl6@)NX-NyA_`#(Jyi3t<8CG%>u zml~*xQ=UvEb|M-e&VFHKH!eOe5;)JAugaMJb z292E9l7gaw5(-%G9~q6}BHh#8InT%KGExm_N+aAm#bRY(BkfvZ`_g-uxr-?ebdKLS zm9%)lSDy{vYHk|oQ_bq0s+59Y4a&8JC-cZ^O)kR+1{Ud6lI&Ky?bm&fQf|qa?ClfE zLgVdOxwO+F+ee36ecMLOVz6(dt_I<5y`;uG;EOKvMB)4+D#X-k=c6QN+;@y#pzboK zzN+q%jg|5AYDg-tPtqFyaS{OyGYu>8*&U6BzZT;A?r~YH3;47w1(+GeHEgq|5k2>3 zQQ3*|m3k5Sv;~T_%(g#y*O}(7az7QUB`2Mex+a(OV4EizT4Oo;4RNAQOo0WqEuTG?4>b$iBea@Mfu#Z2p0hnA>I-H}vyiyJR>*JY{<@>3l!_ z+2Gr|19YLoPBPsVJ|v#6h8Fl(HoP*t7I@n%r1G~-Ru-#m;P73Y5Ou>#t7jtJRUuc@194VsMamb*G@9oKS>vz6f^nmu2Is* z>DaC81A`E6f3srja9C0vpQP%~Wg}OZ`|6Ld-~u%wt`EQun`bR($Y+$pMYr!9_k+Ki z3}n0`a2Rxkh;Jk2-+9--ouJ~o#-}4;{mnY}g`Qb*!eUYv;H3?={L!^EJBD79vE(5a zn2cUs*w79vnnvWaL0Tqg_S=JY1*FF&1@I3CFU;FK8G2V3r?>I0pOeFySYNbz%K5=_ z3>UwxTyWg`n?mj0aa>PSvt}C(F}H2wlX>k}eU7%mvLcwq+=bx>8H~rRkZlmBY(1GNCqwV%E>6@ zsQzT(c)B;!tr{#=ZgJG%WMjXg6>mT%6sp)yadbytG+(7yM@KBPYZ<#^shs3N=7TKdQ! zCZi7(_|fs(oGD!WQMtL`YsE(Uv^O?N(fvVvAMKkH6W34_Z+ z>7k@t@MK47u7xKEN(C{$_BEngUW)Gt1ew{OB6K*)5k#r4{oYCBphFA=)PVeuczi;H zTwsr$+ZOZj?tpZD{j(0w}l7)#Q zNF5XLjm2hO+Mv|sVQI&uw7DFt8^aC>C0#OmtyBCNZWWH2f5G)Hb)gvh56Aq=Cx43-zqZ?-4C=!5CR}3RmX3ML znkBIl0lCh9GzR?_iC71KVl|yXZ zt<5QnSOLdB*;r%38pgU(qf2p5-?;usTGDA_a&ZqE_xFM2z+d2Sf%Uc&lwfOQwm;iD zFb2#gE}PXHt|Fh~=kXoFq;oL_UZAP-IJ*Q+!B{KB16G%1*nv~GNL<*ZK6_dt0h&yn z{D6v>&=CboeqKpC-HE}*r;>jbsj6A3^WgMw zOMKHa*{xLAnwssBIx~I(OR?JiAij#=*P%mA&6Pl)`sLNtDK9F13;NP{Ubs#my0^2KO`*1F~Oq*-@vyTbW$sCs`K z+rzpa@FQmLk7;7ifOnSw9K5x#PN}czc!lc6*Q<6W#!8Tf)Q3m4oGHy61U`GEFVQZK zn#BsFrwRdM83q4$x8);$8nI(xVKKNev$I$2c!?PXA2dq-!L#23a=JB#<@d{|55tXN zD|>aU*wBxk?Y%Q!`zwIHD-$-8U9V^x$_{Tf@QX7`v zowRo==nHlBQ8z4y#2QX6-MB$9M{=89_z&QoqWQ(7gid`|f3LMb=XymLQ3{vD-=7+Z zyHfHjyM7r0=}r^%Ak7qr`2!fggd4`Dd7Wvga!8?$-M9pP4klFEzVwFmd{gTN<_PU? z5c(zHK5ArO>`B2y5zm$0dc^_2cI;;g;Xm&H2E&7Y-}zr*@R4SN)KLs&X0?Q%v#_v4 zSvJ`!D)5$?CjKFHiDE2I85y-@Hj;GJoDO!6d&KbF$873ymv03XN$BW&z;^{utF-v_ zF|0lB*ed$+%}^{gWwnU9-(+yq7=-WG0qIugz|QO z4$cVF)r|vP3IYi|EaxG$)K>q?gWSKdR;^g7ZL9pSBuO_p^y)D#+%P8+U=^E8o2xSLAe0cT;qTBVN;K^Q_NjamU-o77 z#(z9O^d7M5AAOv_`Ukm3sRE*TznX&KijPsc-0u-!q`&iPdx#R3C^1C$(Gw7)QKh}i zf1!#ju0+nedmm|jh;6>Tde)u{^L>Pf7A0RXXN2gPcxQJvEd8Fau&%dP@2&men)K!G z5W~IF@c>)Vdc_?v(Uo%rb>rQS!;027JS#z?{9N8M?^>Q4aTvNEtVBht_creoKDRPn zg=<%M9NHr5XJ+o!oghp0o84}EHr;n$^OMWT$dGsooBZMs&8}1SLNoaceR!7`(xF@% z8b!%#nILy|y9+24=r|akV`kIxB>qCx{pWbn^dF<5pEWRNJrKl20ZE?Zmp{k&>FJ%} zA=6(XN7SQR1X7f3SS1L!@B8|x8tTw&0fKH$);{@*=( zpXIY7S#c3~pr#|%9q0Y5(6NGA`1+LUHh2S# z%2S}ukhTs;Gb$OEzgZ+f9;d)8TUr(Tj%VE4^($O9_$=njQ)j%}4YwZF&LPZBK>>Jq zH`lm3(wu(46Qk179%?1F5dJBW@_TR#P+K`~??#|#Mw)qdd*X02GF8ZpIZ~>Kj|0fV ze&OE!m?@4UQSU6QFf=rDCMAy8oO}(6CPSKI3l+Dy`|K`!C2`kW4omYmRh>5JUgg*y zA?@$jLGECC9Qpd4D;cXleX%BWkPcvrf%&<6{w5F zJSNpJpKnS<(nHeRYV=*vF)r}s+>M6wlo2;6?FFN9oMOx!-}gK3%;p?Iz(@&*vr9yjH3o*+6s3ArIVF|K6@(%19>%d%<} zupdy3)NX(dwk-_XLusrGU+-tWJx}DX*k>i@Nw(sD>IY|qN_&_Q7*+vO{mfjbVa;xR+$yr zhcYQ`DH!-1DAyT{xNt4RTx6Z*=H+!1CstvOij2Nf?%x1Owc{t!9( zJk!;P`VV|=-KJZ6X2eBsWnpBo?H*_q8%8CBWS*p`y4d~6_Z3cD8@RLjn#^L$T3mLo;U#{RlB$->5_^g|gm z4V3jftMzIitWpgvkf+)FlRlNu2CP43r9hjj&29p;ab6Qze~jJP(-X7Pq|fH=?tW3( zbkRk{wV9Ini|3WZ@kDvrnqkjgd^!Hn+`;0HEf17DPC#hFH`(Z{f5=f4$4bN_jpFVkcJ>RCnRVe==WrnVzBtG}8TN zOLwk_;sZL!zf_fL@LCH_bbd_7WFkz8iH!QN?}%jj?L6775%oRa7oHr_NAZxNxa?fk4u%?G9IfCd2AV0@S>U zlOFhWZ?w@=T;lf<{RUi63`^T@cgxTCRz|*DMa|t6lm}IY6_uGUHxX-eX@6z0FRZ4~ zh=3WoXm3|s`65?pzm14)E%MHm1Pkg4N*zkjbpcS$x~b@j`WTeNru%pYxI>an&5U&= zEl~HQwE+VoMzZtDfxF@m5-H%$t167#t1GA-{g`h60Yfh4$tx#|b%BjFYDD8=>VADEYVz|9kw;Jaq zi3An-_A^CAMiP&6p=wyWNC5-OpmHV&FaoPh3dk?d()K)~!}->jU0(&O$@l8JDR=*f z>LE-xW$ypw$_)}mnG`>xl4KViY;#DW{XW%OETH1ym*rR|nA=e2IP$yP_m|@qR4XCr zr#dvGma90*8kL^T3D|~tROKP3#O+imu-~FMhBYHPZv8HeP>zhpn>F69Ex_`e)=fA; zS)MakaNR00n41Ymr~X`uo0oxM+a&ASZzRo;4&+yvmt~i=%u4B?>&9isWnf=gJaoP- z&qxOLh5`Z@o#Nddh$(s+Uoy8edJmMCm18B~wC0h9>sRaQRrgYm6$$zZ&aqSm9fdiOP)zw!f46Zai;Gf;+Be?Qd zuZTt(@P8xMIHZqJSNj00OIuf{~;IAiDqU0hbYN-;#aJwJygFHyuX!HUGe(#p~ZYb z_2#^rgr?U)81jpdiBlQgc23yP>%PYBLVIpon z*5|B1GYr*a-D@p}f$Y~*3QDTcq78>#5mbDWj(45(s4d3JtBYSCVn;-4YG#r7Hp2AJ zF({c{Vn+&slY1EyFH+pX%%PUmGhl&*OP7=A%S;jJjC==b6JntiOh(`}{n zi`kXr=koy~o&z5*Oy4LKE^GFeJGppPo(LCc6sUE0X`{-#O#m&zJRSHMB^@O+u*Ql{ zG?Qwr@1Ltc84SG~j)-Em#J4^+`X8I7MR1zT9#Sp);>8^K)3C5yRODc?pK4X0#54ou zojJ9){`{!LS_a7zM7`3CjZNjJ2|g1DPyResGWj~gfMDSSa>!L4NqniIk5WF4;4g+Ng1Ae}oDJ1(ZID2e4-Ej-f7!vawNGuMvn-w-#}zg8w_k z`tK(HUj&y#efNgk!4?00!3Cn}-&Yz_$bKW-PaboB|M~+#_Qi;MYmvLk(rBopI|bIv zN&9QA8Z49Fj>T@twN~B6X~f7MbND}~Me7%#xWLo||3N5h0HHM4sP_JYP^y1*Zt`=+ z{(~ADeo^DUo1j?j57zAe1=s%*>hd!t{qK~l_4oWn+%KARq7U>>;5+x~MxDYz)EDSIt-7bp&4DO+Qz3$ zCDKf*wj3|Bxii*hvZ)d26c?%7p{$(58{o3F9&!`78fRcw~0d96&4 z7Z_2AvG*tvDQ=0G$q)?qhQ|Fx3^f0Q*SEhTr|atcY&!jjfng=+gF&Q|CS1CGR9Kdh z-{$FH>-{-#9Y$huI&2)xO~t$Z=nlW?cYc#*3qE=~t)XgMi5}gssS0bQP`sf_a3lAk zGITUQf7b5!{c)6fU7kj%nw*%Scx}(?$sk{H%%S07`rflDoB92+^>zNU(LxiY_s{_1 zuoxS3%2-wtnFmD)E65$vbVxn#@a9pCEGJC+agzHgqTFgyA^z;BZFHnpRs zNJR?0=?+B|D_=ujxxQ9Abkb{IonmRSJq?!A5-gYDwix;1*OSVv+B99G<|z|U;eARu z*<|OG*j_=5#)(&D>5;{*U!iyX`t`HX?Kf(=$cl&NsyC@Yk5&)%E)IAtSGy=$u{%BN zRO0}nPZ6*!tgkV2WAgJ|e9;!uuX@5~H)R?4PNTPkm1CMGew8D+t8wy%@=oy1`LV!_ z`hKVs&eR-`xecg5OuKOT^u5|U_H-rRav8{#V;VHVs6P+f;qPb^46&B@*7;z!TBgW! zs9yiZpxuny)(l6;st@HCZTOEbR{bhNWe#2iYPl5v-~v4nosP#B4YWB@keZ{E-OEuk6NS}Pr^zUy!K6H&7OJa$_o8}6>h6VyV-vMaYf zXNaoQ*d8l1UAzi&-tmfRDu}2>^zyK`^b`ZYM)T*wCq#!mbLDcBIA|)(z9tc zkP6bz^d2|2Ita0K|3F1Rh394dfnmkV0+-SAJ`uBz*3y5PGx(VY9Zy@DIv z_SQs$S*Zpe{~3t8{wIvnhL`tC2LESDOnF}ZsU+t){8`RN&_~=`cv+>CK>`K3I37!l zi-BQ?ABW58pw5Kxx#(kB&YD(ZKHCZQbWZ(yKwUmz&yVJrB0H_=qF3d0k)JL4O8Ivt z8Hu<|hUEBs@Po`Vmi&X2wvo-eG%*V`hC6zKP&()o?DC5 ztAF(JDXrkx%zzMoUs!JnVf}V1p=sjFZDW78hOSL8G;*)U!&X}_1n&ytrtsXSN4hO|7a4F+~41qskKTH^eVx3 zHTRXpc$$kPhMh7c7hdXm?oDU+v)B5fG^#XUu>T1VGCwGdq$wEd`4Y<~B z-U7Yvz~-q!ckb8t4{5{c1=<4=hh;Doa?%1WzudYgyR)eT8b!04!p|AQk{NaR>_p*$ zB@FXkm3^2XF+!2EvXymSN0T+d{6vX%Q<(dSJlD{0u5M|c>qT5S?ejZ=%V;wpr2G10>pk^`++Ve+?7uEQQ?Ren2sGJ55- zdSkixD`XPA#$sG&yAG*hF;M0Q#G8$QUYUw{Pg05$z8wSd#t(orud@b?V#0n{A9)^FOS7zq)-g`??vi+6gryfFCa7_Pp@NnR?v{ zoy|O^=0U^7zivL{IA-^z5r3%o;zYM$n|pXVHw{`~pM`f#8R;E2iOev5-o-P&8E;@p zy$_@#X}9($->eB|1ahc`Cmv~dpqpPaY0q2|PvVn%$OV>lUK=oYoi*K_L_^u&wpHF< zbS(HF@ZDYPe=O@eP(-S>l`b>&905o1kn0@Hos zAuw3lcO@I)swz?8D*hlw(b^h!q`Jo`hGe~WU(YcDw!EpHYiUB&F_Hw^d0i zxpRjE&{iEw$oM3l(Ntb3k>%x(B6xBn6OHoGV5hiCf*)c6_wLD4Qv=g%?+u!Sl;qx| zJaUE4LcBH^j{}o1na;-y0!uL&sfT`I;-nl31j0bbT>TsqJmH2*cNZymkYX}%*lB9U;jasz5t`9*yv866#Zysn$Jod?wVwMjN(&!IKD5Xi zmr)&lZQ6#_D($hy&i@sg+LA1#3x+pm*nL;ziL8f~d7;;*lKMtydlY!B#(K&W0QYWD z7G&fsMFWzcqh|h=ol@e)ll5l}ef@rdTP9>Gg|9)}P_q%dH*0@QNlE$e47jy4S`PFc z-<;ZF#6_%pc+TgswU|BKY9sFP@y0JfYm}|Va4BfzMf|8_&%a`e#Cg@|J-p)(%<+<+ zJH6eB^SmEh3_h&|)L+Cr%;0|mk}A@N)!I&)Hgk+qt+Zscdk=#RksQ$1CcG_~3hD0` zbAV*cu&{ARLj=K!ze?zZh^DFVr6fnQ9g3{FH5~O5;^=w@RjRG}DHa>emW< z0a{JOrku>a39E0an^y7ri)G5$^^7Oe(E1gKq_=pvXdjMq;V?2yl#N#Pm$#3WCs%oH zjFlYjgUS(q_$kJ>G#+O@bm#8wt!df%6xDSEpqU-MIrlbCrL<;=h_vvVH-ev|oA(}P00 zUg~=n%pTvu+g0x z#P6FN3qc>2xT)KP0T@6`E`R9;oF%F}`@$W3SMP1-x`^YO>msc6{Vl^Midx)u z-ICn!+^j7v1NSHwk2RmpP93T)RBHw<{U?bJWU5tnj+XH_EHe|OB-FpmpG~G72lxhJ zq6;x5WD=cjrr@9S*7XGtr#O3uSVWsm39`Xu^)aYGOP@$HEbVZpHFqbO*mAP*9y{~3 z%E0GmzqzM(_?_ceYj*cw<_U?nKjL=JC-6T$>Hg`?viU>GiFNIaD#;L1vbD7*ga#>= zf^o0aZV6=;5X=f#z1bT;PaSdo+OG6A#B$!J{LD)Du55a7*4LY0KmC`C8lzkS7AG;2 zt6pwU3&tm-zJ3Vlf&8y)VU2lG-yK~EH#;0A_!^phRLe& z2to5-XNhv1BnPXjQgOA|j$ex)NFOkg%^Z?bH={zP@$F`VZ&o! zn^Is%B1~*+8J5eoCH5WCQ>J=6$T#SOU90fPX+y}%c-Q0ge61hb`I*JgiRxd`AF&CK z%c-Q!uAen-KortDhAUOIDEJ!RJl5`&W8xjHGvo;qcWzS0Ijr2N_R32Vl&E4)Vr;aU z>c=cU+cFd5_uGObOr+|curV-YyJi$A6)9AsoTYB`$-3vgz0wZ$_cRGkcmlD~`}mg1 zeYjK|1fSFRtlxw3XOw!1c%BiJ#av`r8i9Fjda9{a8{`$E{pTF-0 z7Th62=$Wa70;FhwP9aBuX^Z`lWcSc2M)`EFvRiN_gGowp7cv`!!jGnhnG6~Gv&&Rt->FGSmsXF7GP@lv1aX%=SFH55q z%K_i28u!O0nM%r3NaC|&7o@PT7wo$6X6?cROT@VWTl>E9in}Jy{64$V4Oan3f%&Om zd);DeMj(|=q|l{XF7fR~G0?)ScbdGhTylD0Lc{x}@qEyL4(OYY1R8l_PPX*EX@#Qy z>XMaHkf0r&ZZuIpbxxdJwcvfus#OD)TkU!A)OG;Md@I;G`l;_Chr)z88O3F=jFpin zVv)lnwoEo`-F56-boE&n{V#3kSAkWyVyxr^3)0*Ayt{Owe1T`)E%xDa@I#`aMGhb_ zdLAu-N^&j)VUcjGIkernkMT#5M(NRMN)pm&Coj{IeYmQrx{5Mn0=dg~u0{Y@uP$9i zkAaAA1ihWazwi7nFc^|5HT@fG5F{ceR zew5pr?Ay1P33FScjr~oMXqY6=BubxM)?7#u{?D3=KXc434WYiTX8FfqG|!I7{Umb2#$*ptbZx5 z{|>&=9Rmdm^oGgLUR{C-l^ek%!j+;E4fXnu0^0><+?%xSB|O$I41u{+K0vM!A};0X zT61=i#I~GoBiw7=fX zz?7flGylA3tfL27X?(and*M=7C+K*9)L-%0VL$Se*&o1&ke8J7!-P~i0P~ZoUX~a0 z9b@G!KIn@~wxwX;wH$dAal{ehvf=%4qZIs1v*hT$J%8LEU6@nnpBp-wz^7-^ zb4dHQJT_y}B~3qT_eY!}PRDUL&%PLCr_4CL++BB^FV#Cw1gQIN&3tj9({h(mNzI0C zjpe%4BMvQieihbQf1p-vc?*ac zy~j_2Z#q_NPBcAO7JTtLtw^Nrx49{x&*T>8l^QC`0kj(6%j{pF=RQ$_fKi30Wyb)w z;$6|@p05{&kejrtz#JQ3sF9+#catZ4@g{U6FBvIUYq8Lx&Hu@#Q^dvNi^a|yc<8gb z9dxuhea2%PgYU@Rv02P8XZ;#3vV#q7_%8_dM#Xc9^O2q zZwub)M;u%yeL^u{_E8iP3gu=H71b}{@}s>hC6@#8W=rx|f>-0=f}SqSmhVhH5O&#q zdOuLpMo014GY7Mods03GzRpnBsGQu~<3R7t$;x&(Bg@6$P3ITEu#1JS{tWxY9aN%+ zi1eE7xL)1m0;aUcdE`7kViyhLG`=G*qin!O4AST)jJXQ?R>|4vGJY{=yX~VWeyUUC zYBL>TQUjIwnS-~ZToD3H=Wx>8`_u3v(e|q@C2#3ZlO;g#3murw6J4vy$y*-@Qro45 zf$om!MF#bJPv!u|PXkR}!bpWa(H-lCO-$@Fs_^}9dCJL~6g&%#^a>E>!R4Fj(7C!= z`1aU5Fq7h7bF}qgUwUne_V?do9Iv4h14*Nxgm4TL2Ls(K3EWmCy~b;0_Yeo<J6b;tNoiMv0pCI{zoT@q_i*;ihn)ZMo^k&vBp+sk=1-Ii9wDu0R_jV6yZhY}u6i zFbJ0I6ZOGXrn;w zcKC7iKtCmRq7 z{bkWWu(gTrlUnbxJggcw72SBTp6uBAX~VdMSmn~>{64Oe=keijdaz{fxCoVrGfRg69HER0TnuZTPv zw;A$zDLz?YQlxiLRP=c3+hscjE4#!%_BZ3ZsC+!GwAKPJV?ZIf=JN_L=&YI|!fQkS zv@kl@mK*kR?SW;#*dcqvc6I!G&~~TPa*YqtyKc46Dpwnp2h2k77#e+&krbUwx=&m| zNyowgH7@i)Zip*QZyNf{`@2n_K!*+bXdKg0KaMs>V*L@q;S#=rqT$yfq^?NE9QR4u z3lXg!yyF*BeP(~TK5-b3f?d2p=$*F3eU(y)_Q!!4A4Ax(9Jh$3Gj zJ(!DMvsS1Bfs@c=cn$uWwQSo{S6cJ*JL-`l6Xd`3jb8^2@n7=CNQ)_{KnWzS&mtpY!DL*#H+H#)LZBP19D~(#eG?$IB?$@fKoAXNPaA0ne zExg(6vKdPLR|#F#kH1jnoCFyb9NRG6I19~}kWrN-vXUDTsKP9?ow~P$Y|pf&>K#k|f6lBuEAY1jz=XM9DN6C5e)A&WI$5O=^Ou zKM8**&|z`OE2U-m6ztujO3_SK~;6U{VYCov_$?UtA@`&{FW?6#W8^UQ#9G3ro|v{UxQ<&eay6n0@P1e5><2GiT{b3hz@ zEx_8KM`h!DGrcU2R!RN)yC(ofPGa=_Whd{i65QYJ@qTQO8Q1Z_{MCKH~`957RzrcbFsvnu2iZike3X0gv z2JLP3m`9G00+u4=!R|RG;9ZHg2r*@z-)KXW*(3dpU(3snw@dxzZEG1*Rm^&y%e?Te z@h`)wV0{=OPH=AV00*oL`qY^$JUpd&_Kinwkq0fmkspAhi|zkUw)8*QQq;u#e}yd- z_qn28q$^X+BO=Yda;Vu_pv&@5UYe6Q;G;PBQ8i*|QqneW;r1@+-ukfK@<0LAkPC{C zREu%6-3#E()K;#r7s*XZ6sk0M*8HftAD3WRE@qvI4kYrq*=&N6N9azzi1}dQi?Ij! zN$n%KcNbXc5<(N=dCZyCgenND)=cw1M7Q=b>6C2)YSwIW+ZSgyc@kL#g{Xmn*|X9W z*r`){7RV}W{iAY(t6x2%)>4#0E@vJ|K(aW%Ahs5<(=bQf*Ss3Hw7KKo7Fv4QcU+@4 z>TS1B)&ON!Ss9NRtTqgYyQ}tEu+(~%l2R&Iecbe6A)p+L*-L`8K+K0w046;*izF8p z_kFjn_qS6qZn3?(3kYl8b_l`c5RZ(|>8yAmFW*K&2EDtGzZe*t=k$Tc3YMDgFJ0?d zrg)cK9G_6}G|w78RNx}NfjF=M&^>G)|ITeV8WS`(zT+nyEO;anZ13;^ERa9%}Z438!-kmg7YFq`tsEgYe~c4A?z8%jZ=hk{RLF=Z&+0*^c3LR>;; z4x7=pekZ}A4m@A49gIz$-33mFK-`X?CQX~TJ3qGI zrQ5`*SO2me*!~(7U+S3Wfydj9gwhrq`a}^-1D&$h33`YsdD-X(eJ-PJ@qD=6m!T8$y{u-neJEV#8vSmP$=Bo$Zmg1(XT^N^y^?QT(cb_sMVK__|wvX&+gHY>^k!E#}70=C%j-Au z>M71>26VvA9?ab?Mw0P@Otqa%?*NmUcr7aX23Ry+CBka>CttN*zoXr_+F5c*`x0o; zCgDaw{0RIiY-V=KyQ$>Ggt6IZMXuZa9M2eKmVCK<-)B8;$}fF0`C5k%3*GvA(CKdz zGCS-#wO@c?6SAH`Tu&7V|kck|lLZazW6peNzs5o^e;gW{RJ z6P`m)Zj0pSoD#Dsbi#`DXMIQlG)I2)RKsV4|9qF8w|>jqA>}r?m%h z*CXcYBsri%$8_$VpWW)Mj`ioQjy3A^_o|P-fM0&LtQDu_$9|dq%3=%5xlB*Mz9^lC zgOQlIV|AU^8)H9Hvb~HkK-8Qz-7Ltj1g2}Ka^e^bC0_3-|JYf#xu3NsVT&9}%Fg+A zn>2rXMgN0&l@fBk1jxfZY(!)~d(EEB+cVvfmNsWDKATxJlFWNT9=G4Wp_(g?y_D3r znNYpkT~2;RaWii3#KN^0mTRsvTx_3ax9@1oBxk8r<@OPGyn|;K_G);kmV$D~GJGOC znF7B@hz8`Qc$GZ(3;EaTBJg6B`E$e5ao=oFyj7us3>;nu*U5*VtaovEQjqT0?Fjlf zyp_63?BOjhrS-e z3@ape2&K!OK)9Wb)+xYZQ-O5dvL-(fca4*B4VaUaB_`ey(N96-xOQJ7mW$)s$SY7x z!e4=PCe?b8;NS@MiLrLr(-q6)H66zQoTx6bi3p~eW_zceJC9Fr^yue0U3WBRc!7|| zcF`@{fjk5L;oEFSrvpOLDFIQ^c+piG$;MUcOW}PvomX%f* zqflIE&>NB)XZ} zL%#0J@jbAnw-B;0YZm6yGcdG$CNnogybV#9Ym1sa^?x+7v)EHykpB$w3{JhD{K0ca zuc)n)u%V4CPp_W7dc$dYk}lno9&1~bmYrR{A}**ON2v=^-@=$+kWU$}PYq*v$Z^oF zxoP!MZ_Oo5?h*!UCGW$Vz9>_VOs!vYjnkt0*w!F@%AihL6l{P$A?F}DO>S0+ja3U;+ zQu0}jymr!i0j^Q!1d%Pr;w_|{9S+Wl-SZ8SGSoJq+D>o0rbKaUHdz%_5P~C&nBLFP zs)u<6TAj@`AKGRu@uo}qzx)9F?+~I#;Ejkjzs}BSjDF(!1KONAn3|NUlWCt_pFZzy zmlzcjCl&hulZc4!^z^jUCDIzDP>D5=nucidA&2R+;l0%!IBh&)1U8>kU9RL1Cgus3 zKzi72n|jHFWPy}RToqkY`-xzbk6ba`cfPA5J^BWb3mq|ynh0J{Kv&o$BoQp}D0$H9 zna0xi&TEBQmyI)1pIb-C!LgNC7-(}<>I%rkK-Z$M~)m9p%< z5yM2MGIA8tXVW`XhM4bu4m$1Xl>Aw_?MGY-bx3`-)w6oJ3d@sbd*LGH6pzESljc)E z&U3aUEW_3+$u6_YWdU||MXZ(=5XiTrd@x#Vl(2NFH;(m?qOFuelsV+1hbkep2ou@; zNR_1u=o*dMI$u$Hx$lmbxG32(sD!`iKzS1bfOtfGj#}kdDpyhoQj8eOi~<+uN?zi5 zo*v=3{NQbt@uD}f$6v9Xi*kQmdE!!A)yM#x*r+|G*!wEg>*+XzrYlw5bTV+tA-!!g zWKP>G2aAx7*i-Hm+rR$8&&WApFZK?JG7Q3>bqdpwSU~|E&I|?CKfn4~K5oW!7H}yG zHi>)w!NhiX#e*J$tHA~$v*vWs$zW_3YB+!mbn)AV?%l$*F_A5?Sl^-4e2k9%6QEKQMy)(QI z6kA{H>p$hK`MH~&rBz52)$f)NIGb|3XTdReHC%2ws4E%^xLkr4IlhF2h4D?L?cBhp zvQ4Uz6=F|u2lbwQt~P5jKAf5e+-_J5E3t32t(uvyfwB*u-5vPLo13%;PXwAeSa=9z zQIvp05E7%H#W!4g24fKqty%Ig2$jJ6_>4hz<%kVk5a|7*8^ujFds%i*iXIZPsc2Na=O%Av z_P(Mw)6t&VL_1V8G25n|rs@*N4U(u;S|{%e?|ad({&{y0*zCk^7Dak}=vsNw3!ovd zV%Ri)fFF@@L@!H-W&mi2dwU!nfQGblX#T9X>pSZ<>rOJ8?5uE#>xJWk{i&f3hF>tR zGqcd_*H-5rmUpeU;_}_*SyRikb*YhCfi&Ugy$P=i4h$@BELx@RXO>JERAZbN{L9gz z!_$u1?C|oqk5<<)$2u|_l0P^2Ne?v0;7(-TeT%^UTXXZ+Uhh;?f3#3Yn-=fq(JC=} z_>JAYiW5~TrU3m1IWok&3^`~x#G9=r^+{W1Q1=Iy#;3jC@nT{**MxP8UcGt+$Sf2X zW7a;kCkP7Mhtc|a;Pfj9Exc8Z z%x(Oro$Z!1tM-$@@+EM%EJY!s*#R`)J8HB(~quko<$R;jFA@O`QD}+ z*i@>0tsQ|sGo9`DZxCQDI)Y4~&XzrubqF3i+eP1vxNJ~19A=&hAb9B!uFHZB5A5DA z*4`Epk#P-Jbc99~S#hXUlu@;1I4E%l?phi#unkrz%g2qaCwsooLoUn?r+RhC^`&N#L)OiFFk-))UXS{Tz3*eBV&N_t)9<`5X#jaF3C_a$9 z^KuQKOxgj;tpnWCe3HxHd~f9Vdo5$NDW* zRA>kq7TxX$G0K7ERc|J=4x3MPHtHCtgds7Lo=8cnsu!`K zv2w=~eY4^xpUCx#HnjbcznoVCj$n+CG_00;i+!|kE7HNSFPTH=utZs6 zS*fq4$u#sw)q%1#4TuJmOuD(-k?|kEDJtDRAFLx+CX`AjFf%2k6Q{h9bzLQPiVF^o z$ptyL?njOun&gN;)q>z!jC3qi(V5H{d%b)8?%XgjVTY}!@PL5FfuhmiQ&NMdHGM)5 zXq@-ig%pzQljn(5ZxxWiYpY4x#wQ|A_u_QE@>$rYRrSa&VWj(uemV03@-3cem%TRL zXt5nobo@-N0vkN;No$(#y1HZ0BMDJCSY$gq$msr-U~9Kk#zf)mb-jgkDveAu!pncX z2!zF@pk=u~&YySm+QWL=IeKMnC7(Tj`>LOzdL52yN%d6svfJn3PcQw6%&f83c2m9I z;dS)TD(9ntE1)Jycm|^ z>cR4}&j1!jF#8o+l1!X-6Pu7mG~DQ(N@Bo`-g=A74ov#*>FKkz)ec;&O8z~efa~8| zoV2HAHD52PkB+@(`H?%O0T6k@tpE7w4w^2QBK1aP?~ctgJ3W^zYVZ-67X}gi7Ot7~ z1W1I#dIG_7P6sIVrsvI@H!QzaYTjB=a8_AC0|IxHcbv<)G_nJ7E{*W7iD(4`S=z3p zho!%}>TxLc_G@LLb+~Hiq*0Q*@0_#Lik zG55}#HI!49kItkIGc46521*|K^t2`Z5?cZ0$iY)u9-ePF!qYP|_qNp24*@y43o1wC z29sg>M%s(CYt+@!B@=Z_&)ra0* zq&=yws=DFdn!hXjyz6_f155ncHJ@Kx4#qxTrUbg@SutTvO*kT__ohf#E!%#Hbv53# zX>;RWa*FiO#I^$p>U9*fSDhzddre~56A~rWOP)QE`)yv+5@RET+ayGMKsCi%1`RPxwIA>I`Jv{o7^YNjU{^!*rhyks_EL zD0K#&SK>h504u@^hqn*U4Iup8m91SL+%j}v#{!jHd-=z5fIz-8{6eZq>nDbDck_^f5R8IHzr@JH3Gb4rkhi8n3eLycWLD0rd!bslI< z0pg1<4#Vp+<#rzJ3G(|_IBO(7_s@}SGxwoh`bRSV`GV$cfhXWiNZ@8_P3S{1%79oR z!~(gs$-VejL$LM40s2k^b4IU$&HPdZ)u#)`|M?6h#_egMdq5G9D)YSTQu)PddGX*h zz)HKx$@KkCxtHfCy6l5v>WzQh5_p$h0k4WEPL{yG*I?o+D2&a!mvG?E8k26oV&Bx9 zdL6wQ{e4w$R6P3u%v<}9_sc5;n41STBd0H#i@qZ*O7sy6h-c%WKe>o!Zx#bd+B9!f zZ(qEo=t09m#j_hsFTegXo-KOg%FY{?e>BcUoA&|==C2x+r(*^P){MvZF}wS-Z=l) z6sjh#+PmRDz97Di+5yvV3I40TByiEq>wp#r^H=jfy5!1Lp!pAbKO|!P(I?JV-?pNcJsldFs0w^qb zoHvko0MobW?%b*>xqO<$ooh+XvV_eWSC)6?W{F#Z#ii0@b8kLXYLn53>XgxJ&3!kr zx~i-Xv{U>h{A#rI!}9UzKUkD9VI*aD(|cWKYncfM~*zolC0D&u2dZTtRpM&3oQbBPCPb7<<>8GSxJy35Qgce;@rW^CIgvAs!0B%jt(R{#CG zRC9|0N3F$v!Q*0|aWJw|6{4@_T32e^6>X-LJ!a|9l}D$k>k_B8=k^S;XMMOFRJ8VH zz1k$U%wj@SLP9E<<3vsW7BZfmp5A#qd}#$Jme{h^WL4@COeM?!1m<+B4^y0%IhP#X zs0hB9erLZnLN_7uTkNyN9hiPq)*U1lZ$`>h9;-fCefL-S*I8~DwGReuB|4sT*LQ_oyC+|{{q6hrRun;4k>D?9Z(pWY zgb`@Q&7cs+Wn=-UftG$&=r>j}+ICW#ZDmZZaN!ww0F;z0ZEtODRfU2b%*F~oj(fB} z(5g0!Gu0a_w_XtnUXXa^9IMNfnzHa3sl8J0Fy#Kzyq4if_dC9HS8KM*t@`DcJA|T@=e!j-;?pW{BvJT_xJSO6p(o^|95bd$!Z>lERt|-F3g`MLW|qjwOmEt7`y=8DzR9y zb=hCTU83HL9<$=xNMW#DQG(>uBt@YnztxpYg4~54Pnghg27pgC!L(MnX>y5r3}P9> zrc)u?8h+2x`n|^q687p{{BvK1iHgk--)zXkGWE~ua-y*Z$&oCY?Dx8tlF9Hq&YG|o zqvcBuRa~DOZy-6=9Nll)9zLhsd{$;!T=sfM16Hw2n=`E}w^2u~ASLy#)2TU$nf!~> z#G8;Z3)mx%#GQyGIg-1QMb%%Q2D7^%8l{$c%QZiX>CrgdSH?k5MJ{igl4RINtd1AA zi%z<4B=JoZJFIBj0*?-E|1h6fJyPJHWEFCot+B3CDX)*_cZU)2bx(g+kaA5hZJ3#D zjo>nznzNNl@pi2GjGJxxm_gX=Cr!}oj}@y5&{Hm$$|dUfXjc9E-C{d0AB~nk>z)`w2o~c?HVn&T(T0>(1aiA6j%V4x!c_lGTJSe*J z9uJQqA(asQ+asdlEYM*GX3v(& zP3rX7m!A1vT6xI2p0q!_GADNAnV`pNrFy>l6UQ}0Tw{STr`3@qR@g#s2*@yH^43|Q zyVdCZtl>6LlVAJ#gsWlG5yAXTy-fWRa9@Ur)8Pvk{uXP7+Cp$TNaY7qRR2i7uX)Jn z2NAatt!W;cUFR|{!_B@99Pp{`X^h@Nr=-QI+;C44Vtc>xQ^L4?JAtEGy9V1_j!UUt zlMc1Z@e3X6wKP&vQZ|}e{$Bj3szJn8&LQa0nAZ=r?%=TCB;gU-?#8#5=Mv7`8A;(D zD(kM#GuMnVutPi$cZ6q67en(_4}ro?E!1k4v?(IBADj{8sx9c6Po!CzP(ItvH`j3J zP3DD}H-o5zoMplLiuIbo^MJmSsSVxkN~BkF&AdpW2;U_l)QqP_Ey|Y+2`@J71NrB} zg%cG_YZJY-(%fSSQ(^wp?Ij1K%_<`zBvz7A<1g{uA!scCr#?YG`+%UmL>ut6NOm=t zzSN{kH`U%tz5R(QyBxsXpPHBRmQ=i#R&t>*Nmo+LUN+FR`uP1Fu2uUYVo=xYMo#7! zUb_ zXW^^$2`Yef2PTqN{0J!L679O8b(Dw#Q;g3H{i#oo8}+IK9Y1ZS5S$1=>bUz7(S&vT zD4@u?X4GG$z=#yexNNB8zWxtj{5A3B5TcjdVPK}Dy;Z}HUE6*`E6HJj^_vmtNy{CW&lbjmCk z2q^fz`0wIB8nlU++SU|tI#PEs;MGRttu{neW0%8P89^FfEpQrUj zbq*S%$!wgQ#f8JwsZa%;D1NA_NWZ1i_-jq*OT|6~kgJ~I+iR~E5XZ6{2FgPxy9-x^ z$LF(ktRfHVPuN35k1MIJW-T!utVcBgI~&`yO#-5eu9#rajw3?A@@NR6WHl)o@V zNh-Iu%?($BINAX`Jxez1ZeOvQ%%So2u;cpG{Y#7BlPKtunMVu)*XW$!9+2B0@Bk=i zP0psXTvP62Jnt%>NoA06pWp)?cdjM1rM|S9GP7R&3hT3G{clc7L!(x9)X7|=38xR- z{Ro0HZ}S`JUrut8y2N{iT&C;=8+djqajk#ca)g{-;&|f-=@$C-Xu~-js*Lywm!n|q zp=MgowF&#WXM_lhl+#SI+*R5jO3HiHoebh@na+1gUKuQ6d6n!jUgu^yU~1w}C<<8# zy(Hw6{f4j*q>9kxZ9%kKi^b!<&}k3&xjzad9Ww~^k;2RN>bsCi^PMMyTAkdeHQE|wVI^dzr=z>C!4osZ z^C(M~&-qi*b9Kg!0leoX?6{1>mHf>?$`YUD7_kTC1V9ndKd~h2C3<+X_kAN0b$%)z zZ@w~#>Ns&J)Uz@L2QPB&Iu;9E4uJpAaOE9$8Jlo)-4KPYmJ&Yc4U|lAoySIM(*O)c zHkVQRh{?jw*Q9Q^L_{%S9kY2to&gcl(=!$}G9e3@%Y%8N(k7K5r5@i-n%1UWH?k36 zo1v9b_|{xT$RuQUDDY`H<0E~bD!^mc*l*vysr%dcg$S@Xh(@jyRXvGj)3NG?m_rJ; zcOeGyfRi$b)hyYe?-?nhTm+9zCNs0(+-NH?#k`&#&ly2BBw%kfFfcF-DI+pldpeZDJUbmU16^-X@>`XxTqZo-s3`2p zOwZFSsDR>&BDY^1TEY1FLQ~^rNT=6CrQhEu?gkgvI3Lm!!pW*_8&V^;`7FmRb_9qt z4P3MIZX>_4blpeh9|G^-%o47ILoiX&s#*k`?T1{JLQF-Iv;8_Aw4l&+d8`OhE4uzv zSbd(YpL81V+1pOK{b<)_TnOe%_B|1@uG7of38?ITXqyoCZ2&ak*vw|TFyR#2syP`B z1U*_IlWr#dN8`>ryL<_x<2?u`j%?wBsSS zR1o%5kLOe7ZZFUAdRSqg`PzF$YSqgk3?qRGya#EZ;45y1n9U5p!GYI%-~`KV?ZNfG zQ?I7*yq~gYj{W5Y_i;SxV0Khhq^e6dgAFX15vE1xDIYJs5^ipK^@=*IYCe5<346-$ z)cq85SmaK3h`WXrl+!3VfIP)jj(vHw3I4(`wd&K*oqH75dtC(QV_SL4Es{h^C48?M z3;ehZ6f+50+`OjLHK8l&ydhb&=a)A(4Tomt#-;LPJ`U(CY7!7lJ_uC9HXE)hVAJu} zJAUQYQ<>w`tP(m5cl%EGy=r+m&2gP%?dKOjS$(wLx}IfaGi-l^wsC;DUw6g+AtRr= zMMs9Co|{{(i+k8fKUoktdl-2NjX7iv@$k}BJC9^DJ)yFt#O#{{Ul%RCcda;6+;;;_ zDPM2Iejfas-IiAozfj-=kCetEZ}lZ$FB@}8t{eNfH_+Q^?m!)~M+IwDQxTzq;mIsK zbTLI-;%dRl8IX3JtYie8F{XG(4CQOF4C}iGugz+uv1%KK+b?)2FZ>_Y;pIpr=6!s2*T>0}5IRaNQ`y2rHRug9Xa(cF}1P zLA!VZmmIR$>njOddVvyuZ8ISsij7P;B5JJahxJWYCZ5~B?0~xNx4ED3%WD)qGul~v zyJyogJDxpMCvcMbFpjY^@eVBSA*1HtPj+y^sCiLsBPbzmu`~g68LCv^`dPQC#Po~g z8PC%^jn3U1Cx%ut)glCtmyoD?ukx+QUG<*9YyF|;7YigqkR zNRr5<7gk0L!DF=ty7G_b+q>(-3v?jzThreyLqFNasq)$IY&FRZWm-Y20U&|&$1>X+ z-lfgx+5TNQz70|($b?7-e7E_VjIKkkpys$*ZXYw$&)!yt$~eRGa4HFEiKpH7ML!DbS^ z!h7Bw?Qwhe<)2LJal7LY*CJ{ua8$o5mQ^DW1^yH5HU>lbw5}a&V36xv<-(S3As{7% z%d}RCJm#^kShV~&R%T&6CS@Iz2eRW?+A1%kaI;-b2U+U7&lm|~SWN1l5kzf?_|+R| zvT3W_SW8~ocx~f`<)ch{zfEN_qM7R~8G+5yq~n$*dkek4c+p@Xd`9Bl4G{UO*vam< z%Jf^{Z<2lQ5Ia~I5ae&tHYRgx>8dgz+w75Fq_Qqaa7>_;d(?{B(m3i)2M9^PGv#lL zojd4B1z6afCb&D+m*Gf;jFM-l+~!$E-~@b_TmI=hi|p5IzOpz@t0%nM@@4hi!8?4& z9E5-r?lReRDcw7Gtkdvw+Wb0~5P1MRYc48Azuh`V(jD*@cq^iuA8qG(C@4pdx6fUh z)y3je!Kct&o88jSqc2#K8M8=R3J=VE7xi4U%dMCLaRW=*#y>91ksE})y-Wl)UO@SK zLW{70ECK_5ny6nn41jCU8Vl1wIRu9cCTBl=Cs5)2?mSP+TXF9j0ZiAKpQITX&7B@% z!@MJ8X!)^xTu@=1aLSFFZRny#Shd6>q)?^1k_zCk2-YjK|4p~b?SKih^|CP5CohcL zp52$eL+plmbgQ@fy@&B{7Z0%c8_ERLUW`o$HXn`NJ>PJ;Om%0Y-gP7pDL4R%{Qk=? z6x+g6^KusbI9P5>Huvvn3{mUS&(Y<{&3%ejEwdq@iB^7cW!Bm|0=L&u77rw{?If(~ zigk=~x1}82zEBetU`r);&>h)w{OvMrfMDWVZxt|RU0BTMripu@{G_3NsTAj*5bI*f zwYFXZ{?ldrKKje2iIoHdAC{anzA*SuH^NF1(TeRrL}zr|>@E-MK#{}L)bGDHyq7xp z^eBleKQLA1O9ksi?8AdhR{mOGK)|Lsm4E;+e1Lyf3wO~e%EJU3NSq{x2hBazzRh%56$bK~dkn}!WcTwFO-rp!>O8$pp@HXktX@45y(MtL<; zr%(>FBfZQ?xuc29$tR7;Uwuxn1koa8;ti9SM=aU_hfYS$&nlA^GKifVwwxk>Hxu|% z@vDn^+V%npFC}b%>dDr+KGf?J=OsRXXRShI!jO_G{d@@jC=heNbe;)w8yASp%2=Fh z+Foo7z#n*{gv@(%0?_L7e}5+)j4~|)J|;B035 z?T7L|Zt>eOPK*+p$AtQRLWf%a_LtA1^xT6-Mt-NZqaFp_a7>h{eJX?)os0DMciy)F z;rJF3%JV{JzvBhJdg%}6?%&^q(g6#Z;LBHx|G35Pn_WSPzmm&-8II(&klIGT6unDrh|$M?x%J3OGC^V zdcSr*z;tT>=Gv7?hBS%uSxw9eR11DQncUV;7t=bqGK8Ehdw+xZrC-}SS@o@HQD%+| ziAQ34UK5C2@=5t7FxzZUcU?u1OmH>-f8(0?m|`7=`Y2XCXB!!AFgqk{Rn{(KDCHky zYW)%Dhp>%ocV=rSG9E=w9}y|u!tAd!ql>F~UXc&~(ouLe8$@Jj9W-RA-F8Cs@yi@g zOmN7m=A=ufmVMY5v3-Y&OZ60VSR}GDix(amN(-cU`%RQuRF&vQi}U?{9gaDO=g;mx z{+paQz|tttr861)#D3?_ost(P45JmhG`UY}vfYm6{DDwCJ7aK4D4k4jTa`msn9xF! zImX=}vhwZ?6}K@Q#R{uYhVM!t!9SEJ|IPTu0BI5>?qg!vt0APqMAugYyLgz`h75^q#An#c^^} z-n_w~d-u#@@+X*zl9B}wL5%wH^GNCzrsrxoVj?lZIk z+EN{xF{MTWO?0c{H0#Jp4p*H*q$r%#oQQ}jIK#;*FgQ3YkG;MzVWQ>j?**{|wa3S1 zr`|_>Bz#@@yj%yS#8ka? zyVobesEftiNAvkO@93pxT0>pcCZKoL0$ABtwf3E1wQE-7im zYAqFie5=y#I{R*S5euMWRuT~(?6PWxtTi9DI()Oo;kez;XQ?9xxZI`Mp(}eoZO047 zU&(|d)cmk0n-gj>F1bWOO>$>zYiqCQW!6>Ill<3(^(`;MMw#EATZ9Knw3$-QRgt3M z9(kCT|K{K{X+Ilsa!b)0!9R?H!uPXcnANmTPxd3Lb}cOYgvmIl?~(Z8nV3J50|2eL z%&MeL<@D(35U)>0+2*8aMF3%2(!1n))! z-n|_5@SkkCAm@wt!EB%xf0Lv#8p)cUU^1S^6)ds>&N=;ec+RmFA35Mvcu>x;fF_o& z7~cJ2y&-BgR%_=cD6(-s;>9Dr-+Owf3d%%@@Jo+JTLD1S6H&-s-d%Watm1SAw!7H- zs?g}XHo_H0jCsVBy&Ln#=j*>X<{M?A_}@hoy?t0iHSNGQ`_Vqf>@yb(650;9f+`#L zaJ({+;@^f1HmV@fLo4?#D)ev9=b<+(D*5}{$M)QR8^Zwabp?EVcIa=^3H_)47u<$6 zpEgD0Z;E6#N?cYp;5wHWzwdRf?u&|``grvB7!_<)OX%+f(kIOMgaQxrPGkP|d91df z2s1714&Mj~4Gl+N&b3Z4zRr3+x!;2{TP%Ku)SZX`FKTnshV+|e~RFBtv`4<#P=b_T|^ z5^{&^+<^3cPSQh7%{R5hff4fDOsxx574*stZ?*Xvx%v!5&HY#u1!bkzg)Z83%#dp5 zym(A?bWs-6n<;hj$&p^xq*fzPJqpXb-4>m0Kc$J0RHnYLd0s(-xCQh4XFdl5KIN?f z=z3HwM3_hIS}!GDdVIR?@XP#QYC!+=k1Imi*qqF^%t>u*el7nTSg1x?1C>FWg7`7C z(AElil8Doup-fUw%SRv4q4A5a8t{O5s!tpCds1I~44ApwD1oHoVdH_D|NQjVwQGSA zVL!32{hzzhW>iCGQ(;6l8^z&gfwopD;EiGsu=w*ha12x&pgXIe;=MS#qWi{M5p@*y zX#)I_7u&$Ee_<-6+78O6OBx~sGcv~jHTbyRdCbz}R2grx*UG6E0a9r1@2zBCmlU^=NOU;P)Z%Cv|{0dxS#%?(T| zElQvw9oxT#%173V8la+Lfz#Wcgci_42hRXLDC8mYpQYz8QKjep{{gqeV?)|A&`TxY z+UmyycH=MTDJpgqK-K6({m-|1@2MLB!NIueq_e-}PJAS2W8Wh1t#RK&m>4h4Fc)X! zWZ>{DBJF&kTzc`vLll0-6dx9WzR%xtTm@KjlJEZB|Nr@|cPg;3kX-LG_ zFPc0O6GmAxv@Af+XyF!LmMe_HU?{YaHZ}9+}_Z5Dbsmt8dc2Le`D!#zj12Hb31Vd|{^@pgJ-_ut)Q81Spqe_IP zfyYE0^?ny+Ux^Xk@D@OV%rw+`i*N2{CNU8a5n15f<-Q6% z%Bqv-7eJyUS8sKN`Q(tp?QUPdqgZ7qF4jG`S3ackmip;ujj*D6q{LO3>yC5g6WLJ% z+!&!iq>6>Yerc>I#5@5hafvj)H?>)F(v6+he9+!fXn));3n;Q{E4P7B=m2N`0A3WW zvTu2=>Bt6wGUu5<;I@dxC&OTq7Y6~Z77a5+VrRA*Nn1N3ng$P=EjU6%j$=FGIguaQ z+dpE$U94+ab*gpY?yeHm-zROyI_FOYq^zJzd-jmhYd{Zf`ijFst2yMe@c*7{x}?so zUj-;PN9rtVfD*jY^3kkju4;&t$zf{l0W21tr3zn9)<{yu^emv7=62(^{Y>2|TV@yK z{rqQ}NE!x#V)gJ&Pgu~j=cK0@WTh#z{qmSsbGmd;*3;68zKW-p1<9P`!TZn={cq;U z!t07v&!H>9u9-&teA_dLDT*rZiEn`i$*M=AM30`QBNkySgDF-Ygvo&1IYRker?Y@3 zj`c~xj-v}T8_<3nkqWDc`;r_r+HhyfL_NUw67Z)}7_J2J6e2{J!0L zIYQTEo!YTTZ%CgC@5Y(&+q*!5bj%5os1mI5!DH%4g3P7sF+>#i0fh@&0z4Pbua|8P zHc1X&HA*DNyk(lGcH$*JlGbbP@%;nOy5;!43C~)0C&Dl9O%oi0SjSY=ca$0_@y*k% zQ?m2ER)-W#(9|Uzm%HbB=lUvm#*Bka2{pB|@a{8`A4{fkg4_8Ma4pseJFQ1Pp`$J%Kdr7uvsszu5cY~^RI1a-?Da1L z@P#K>q(Zn4k<5N&x+UszW_@LWRV^XWqop2Y3A|qE-WUYV0&)xZKpBQLSb5^p z!UcV(D@@#b?|E;U5P*{Sp4b^Zz$;hYfV5y;D~aF%dP{#?Eh78lnu5tkz8}X`>0H&3q@R&u1(fY?tNGq&x$}=?bX_kgjTeJNoRAP%&?lf&U6lsy^uXS zMHXG>-Ko0$U{}y~HUOkz-KOL~yz@1|ozD_xxKfCGjZ`nx9lbHr>Be-1Eu@M`Q=NhH z@QU*@32I<#B|;tjF}VCMc4W7E|6MjqkWc%Z&3d}|i_H?>ovs4dtZDov8?e$MI7wzE z{Pd)Qn#!WrrYW}r zdt}CRCpZYXNiXd5ASNIY{i|F6Y{BIMgE)rw=M`2CS_4m^0-3e~df# z?>bgJmc3&(QkkX)e}xIsOgOP;>dLdOwRp=5L`DsM0a~8{K&#ybyzi`5$TqOnnBZP_ zu*%XarqpkuT->e3pu+3HxMr(Cl{!a@CI&H?d;X`E0($B#p(QvH!ljO;58qy z?6M(elUZo!L9}M7WGVQs_hm~1HEL#Pgi~Z1W_Kt({iHHm{jDA@e0Iz#8`)-uK73vbQ?=>Md&*c%e%Fq?&7fsd=s4|ItTBjfF)m*HpLTQJD&>Nsk@9 z_!q)UWSLc56h{`3| z0JKscv3#3k-E2HzMw;=?U}l8Raw2iET)WM>Ksu>UAt#h0ad!Nw)3iU0aGv3Mx80hp zWWSwMNG>F3(1jhrSAjzH{-;4K`(#{#^~J43btss(W)Xk?%}_|CXQiIo#zZ9b^)9gc zkT9T>yM?T{o%GD%)VjXLx@)LJ`HJx1_~;}5TLn5b%E>O$Q_-kgb{f<#o(k)RgBf@AN;RG_kYg?BB2G@k(1hI2VM)Lf{yIN7B zifuaHu4p|PSH_d&-|Wgq90Q-tURrBMiH=SwVgpc?K!5lM4bYLlUuW83Y{Xu^(9gxP zIA%#^goMjpN2E@4G*Gg2aBF{k)O=yGr;6NOKryB;N!4AGUBB?8(uV3_=4ljGVEhHR!n=yTylN&Cc9Ra_w_HZ>_zwn(FnS=0lQ^b} zIYVNFgOB6qHIF72IBYsew~t3m=7uQM<`bQ_s$yo-lR>k4FCcB=cHV`O@;prGRe`3D#?cV0WYP%etbNXR9qz%YVc;Pgd%n5104+wWa{+18yiGAM1v75?M-lKNjfg#1Z=i*PGGUUM zlz)P0$n7N3xwKv)*++AqzelKu-7z1^j{;~W_-cfM9wNFCwhl9n%2N+tFp$3m+YVS7 zoq^LeYjmtNqF60?fYtJpj7t;|L7jf?|c8g3lAu2_fKYvwxtsPVne04p5*h3VCWBnXg;iHs9+n zyiwQ0E)2lquUW`vFf0E$E^ zYUQs7bv*~Wi7er>w`LGIyWZ z$-H+;ao9>v&r|wpPDft4OP>NTzO9?=Y!nAQ)HLz_DBs#_wzzEICJ^gWNlcK*w76J_ zLa@{2Z*W6Nuk9c{k&vV@JRpu(;?&VtVy;mxZ0Auird0EL5^2-f8m0xW9l7yD;BLRc z=}tPKAm~x?%kPF?{WlCNT`d$p01Q`~-V3gd*@}nlL7Yg#sL!!pU*S+>AENHfq{<~} zq1=4_IYDGq46?s=)R9R!A$rXcDJ3mk<)ZY^(3zb)}K8mYHrqnTp7@eH8a!VUKx5r&A5km(prWi3ys4t6A ziFUr=rd$qXu=6TlGCplI08HUqL-GZs5w!%5SNR(K^80|Sg;F3z-_>|d2x@PiL#_`V zA`I!PlC!FxQ`YIaVi$9R;FH;3^fe1f*?03%i}o=paj~1=zqEIqQB7~l9uZVP1Ph?j z35rM)kxn2W9Ym^h!V#1vAOz_xa21sjREp9C0-_Y9cMwII^xi>w4M?wfI|+Es^}Msz zTkqR@>+*q>_1|rFo0+}mw`VFx|GX>RLJ1ulTzC36D%3)r}BW3TpRjoUbbX)Qx zB7oZCf~8f5?PA0eLuYSAXG*_M^Z+FF!uM0m!!ihq$NEu5CN)&Y}2F>Pk@#oA_wkojy!Nu zBLwL{`dy}ehj72~u0<{JER~Xx<^yS)Gfi|zfxm*cER1MLS=3ud3E(Zwmf^ve!vWm9 zWqNy6=-El0nSwEdDap?qUvKAC0a@IY>vh~4=Qn@xByDs~gu0~t``b_)ddqS7%DG_J z5*hw#=pV0eC>}@_E0bSRP)u8@PaIqW29wZFzTWbyIf?NX_U7?`!*mjW-r{1-|8;Rz z^xvSjf{i3D%0WUaa1RpuS$S5MYKIpT7f#oz042pOt?|?0RS@zd$tqh2MbXKJdV~u< z*eTNULoaWNIRCnV`jxd3XU*zaxVdmBqT|VNCLl#cMac|kC;k|E<;p)sgfh#$zx(5L@q`Kk^JNPdEU_1mQM=L zkzZOG-H1paY>GHy5JwzyWiOL_62QG)8%81y`wrk>*R3w=pUobaVVLA&g$V? zRm)&&Ac;8>T&!A0xu21Jh9*~+Igq&*+I;+loF0Ga8xpCedgJkNH{l;fKa%tae8`c# zaw+0Ju&V#Ss_O3pUMn>Q7~9_!h@r{<1)srTFRsl^ULm2(xo;s8^}=l?@eP47}$3 zTs%HD?sshM5;wOh@L9+%;PaYC*GN>>J8C76XC++NB$RhhtgebQ5CmJeteI?i8sKsG zC)7fzhNH*DKCSSj^YI0F88p3jh+ya7r~_x433Kq6(b+-hXLgsHAdzU)(uCK z6qH*Nb(C%4%tkRlTzhdp>%<0zrTqNfe&Lx1yPE1)_oBx97 ztO z9{`A(y08n_oc=PNR9S#e2Iw=KlP!R|Q86+q4da5}!NK;R$L&MH-y|(cjV3K?Hj_LJ zVT4|`*4mjj_&K0*yVw2a!J3w)8an!G4f3A2VbxU|QS8xeSCoqlzn*}(2h0)9t$~}A zKK5Zaq*wu;arVFi=BolJ&CZ={{QMfn)=FgDofgfG(t@kk+%h4t+(lk5_miPbwtU%8 z1*ZV&m3WUf7P*{bwrJx(-W1_lh7zl#&?D<^o3$mj)ORPIcecH^QkdynI8!tl)vt;| znU@%QDC7JBgO`}|#cH-5io#BAL z5_i-!ogHUpmy|Sl4;)lA=weyT;@q_3JZ9xCxM7a!Gr6v$@Z`HZz2Cvg6_w&Cd@F=m z+%7VYu4Xk&U_N{5i}|Z&rL$6-J-pV}!yug|U;F}+~~m_p;s#W;a<>BRGO-78KR4FqGOaC+l>`DZ5#02va&kb@d0Zp^ZJ`2<(P49 zy6Y<*t_VPGJkv_4gv6{vD@KHX55KdBgr+^e&` zANjIf^>W-Z8u@Xcy%@29l=~R~vUkdUYo-Q1pS@l_-I|7Cyqt9N-u*XkulI48|18kL z)2uInClv0rA~!Rbm3orDOgu{Pfepv?P9-BgdUq>Y1qW zC*BhbVxF?V@7m=?L!s~{V{v5=nPy%iBr5Jql=qfHoP@}>WVA(ED~Ynr4vk z9ylD7oTI#3AM^w1BuFa-Wm0Vzaoc`oqu`&d1pJ>o>2)hQt;6 zp3h%D8?z7Gtcm~e&d$CyTfdm5<1K&W>ct0`+%q~7^~WHqAyAE>}(W)=~Q`v{-$24zTx|}jzu|I1#Uf@>IZ~AmdDMb_gJlnZ(*s=3o zcfaRQgqeVcgvhHDu@5<7ckYCApDa_#k1old9<+1Xf3>u9tLQp<8>(9zgkkT#fyBEUmUXNUuwq) z&b!;b{ZZ?`G;xqv!~(P-I(1H*QS+DW0sm{451fFpwlK=yz5`?pt@wL8)Jqkx*Vqq#wu2m@E8MMDnMjNqL@Tw+Yoe_uMx_2| zqQ+6kGD6~v>4P*x?p*>tOkVIw?um`eM$*%eb7~|TIoh35eS_~jguX+ai0JG^C0S{0 z+#XFhf%CyvSSVYeor<|sHj!q*3~3pq^E!nOCKmd__ZUoGYV%*Ju@cZE?xJEzHa6|V zBs0gg^9CY-XpTvugCQNUKjK3Qm6sAzUcBIt3f;RGh`g(*X()gRFijDzrAjg#NL6y|$#QP`Q3y(|0`8+KB zjvY4Wb+{Wlj~e9VUsmuBk0QL*Ibn*tJBm)0F{ey3DUHjeH-gJUJxVe%q<|kRDDh^n zOy|KQvG|aUKWe!dkn4GE#x~GtxU;zqgP?1O@~@cS%f;GbOcEAi7D;@u7>F>F8*4|7 zrS6=dSy1hoqV_k)j|-@%`!s0AuuFjkC(##_1#~)+>aSU9bn1MOxv-}A{HnZ3lx7x* z25L6Jq)~i>-@<2d;E{by^uU)o(T$Rgf^IR<;ojl>^`T*Viwv)B?0k+?*(I1S5mJ)p zW8;#sS;y}|(C>c#n%}u_gatU9gC7QI&D10(K>^VZue$l?j z3;0gcRdvIs!F(&pN!osngteJjLDBa})zwbpMk9+_5uvlJFjU?^j z0ZoS`5?nw@DPvm9Vy@hx^^pi86iG6uPty=#;&wUCYnjSn!^`FCQ^tIZNX7DHdW$Wo z^4aXdtqA&@vGyjBOX5D2n|o&8=4*0n;@^midwVxfE~EPGyB%!B^z`8+=hl&U`m~im z_&DX3C{{4aIC{g*oELgv`Jl8;-l0P{B&hQa92(~}S(M(xU*iRB+8RpUoKK1VIzTu1 z?fZ;?*&e0jS&R6MSdl+|IOxC87l;q))P?he#BVn8>cqRYk?}t?)sk55$>qj=i}pE# zl^AXK)MfDH?so14ewU`ePt_6dplIyex1K-76_6-t|E{1TM~{6i^s13xQG+C*(Sj!4 zRwfVf)*>A0Cg{F_ePY&kR^UgTE*!<^VjTh>*v>hB!%`i!(}(t##BD7i2Ps2sUzF|Bp zV>TOKWjlMj_L3TB-eN8nT<+*`Zde#w?_Uf$N+ro6?$f|$7YyNGU>sZTkz(2z9DBS@ zf46&3eyuQz*+%vlZrE4#SV*I%_Gfyzrw0b*(!MuSaIGb05(gVgZOalS25eIbWZhI8 zT+g`mrC`~dT~ZPVBEP>{|6zHn*@xGIX6FWGLN{CIB4_CGGz3*$>X!9>J^Oqtaj$Z5XE`rY-83KZVeCPEVFF?t!gg^p2zYa^rFbC4|gis0%e1z~ZdTx#m z|A;HWEY$8i8WqV96MazimfvT8w{X9hery^NG9w~&e`C;npgSax0XOiK9A^}z_*6g7 zS>0B}IPNJNLD6TII+gBj@jl+^1yFHsp%J@C!hQgpje?fDmiSzC+(K?cNbhJ0fDhPh zPpnnX^I{C`b987|3n$;t{n(}8s8LE)4LTbVtLG}k&*f5$%B&wM7_bhU_wxLsTHZZe z*@`Oku6sGP$V^O}_bkexm5TS7mYO(ci1_I1`L!-=InA)>*u3lJTZP1!a_7u$e*8ci zv5@j>euR)hBd9!`PPvAWKV2;iFW1*>NwN?5P{Kyo29E5Z-rw3pp`t4r%X~FVjC^IA zj=M{KDq``oBHoT&^YfeehIS|fM?53^qJY>=UBf$LmuJ9bK=s~+B=EB9u&B~oAG)d) z&be`Wp|E`A^A4trc`l>CE}}Ev2(EdbJUzMF!~=6$fyqlYzjV8gJ~L#{c?eQJV>2bh zcoxSM-H6e|%xO=~ktiuuLd-UMr2LqeAups4mcqM?N`Ecp`t|XiT|~6Sjzx0(+vMD? zKAY;W08H?=Z%@t(SZ9T(sTE;`KMfz*`Fc;+D8^RaeQLqwk(jWcc)?gxqr>Am$f)&uV#l_WH;g#A>M0(FxtSC5`f0?-y^lYWFZoa=@c`&tdmjjokJdmG?H* ztd^geis!z*tBPZ>`qCkAJ6=0IWqc?7LQb|0#}xdA=z5RMhTb(F>G=|DVfuW>aqm|P zH#ODZDne|paKyNCw}>vi;61aX#~+(e^V};2C$DkTcgV7UXd}RPk>^@1*)qE7TMB2{ zN)voc*JXe)67DV+DOt8YYtx&O3Raoue2dcTv(a){@~G%l^+vcyU?oORt6e(;PTqnUF|q zCM+&w=e4+_A3JwyFgfv&i>|(w*3Hp0Q;K47<4>Mk(5v!kYzJbm^8M+}5q~N!5QUbQ zmdhr2h5Y4W{hjQr!93P@{J5=ZCzPj>hC>d95zlsU<2eol3FLMX8mDjISTXJ!?gtD- z3go0Jn&t4rK}skG5R($Y5#?w8nj0iSX@TVC5BK}Mo;Vl4r)cLV}FH~=|F6YkK-cQdl zsTy#6zo~tX{^G3JOxwNq_aSjc+|#XRFWnIQ7$n@0 zMmY^14;tpX9Pgv{ps-aze5|`qW(VIm3t4hayPDX_K;e$=Mh6M-$9sAH;sJlyPK5FJ zGs!iR1u)Zkp!&J;nLou6DW0wt$1P)R9TrkCpvJaq8CaKDr6nln1ehH32L`8|rj$mN z&(94JmVXg(C%L<;xor_7N#ge{KmoRZ(aBe8hfDWx=S+~Fa1Y~3`Tw3jUOu60uia?D zfWRse+K4-&qDi4wp^mc$?8QA6=HX%3(b>6H>^(z`AHHc6v6Y${0wMS^K|wGq??5DW zXeZ=lfNKNY+qY_9WBA!k06TE}hOjQ=&HjhC>68a;q!4HDWQ?NXqD~>d5jI8uEb3MJ zfKjmde%6z0p@WqlMV^~a>&Ha7Kc6s!&f{YXdiiYVF5w=l?_JQch z5o-?6(?A>4VNYQ;qXl0BY59V3d%iF+3o@T##Ibs$k}-67QKUSK@ag zE>(&=nX=QsAfnp$$kD{KH2!`KNiVuilg-q!vh9Y4J7ebyEpT0_Er1yt8+Xt|XeaB? z^g^r*4er(@UefVWR&nurbn^~9a7(JDAW0ymC5xnu*_M*>u?}N&KEzh{6jVn0(^g*|tgBs$ zo^}YC3ZWbxlB0hz9_vP^3b%h4WYJZK}-2XLTW)LZetRE;mHd;)MF*bBMe>KTUv eA^(4Z{9WdjR357~L8%EM@TVlFCYvc^>i1vX3SjmC literal 0 HcmV?d00001 diff --git a/assets/seqera_cloud_execution.png b/assets/seqera_cloud_execution.png new file mode 100644 index 0000000000000000000000000000000000000000..bc733127f510903a76022c36d1c7d30dd0717075 GIT binary patch literal 100952 zcmdRW1yfzi)-CQX2@VPF?he7-Ex5b8dvJFT?(R--cL)v}cXxR^=iGbWSKmE9;JvC| zyH>5PC9`Gr>@mjNVe+zKh;TS?ARr)!65`(!K|ml_K|sK#6L%}b~0N5D~nM}_N8^s$;MHqX6x z!$DtO*HCi7o zyri5-lps}ZwO5KQJWy!}wudDZnBBFPOQk5b@e}3{`ItCowr-xKX>c&QP|p>aB^4hP zma`tWi+%0r+ADla80NSYpB@{`Vw~s`g6vQ~N1+Z(A?OdLJ(>=wZNk)D;B%+Eu{rqdgrU<~4YKen8IHnB|L9&F!4OMQgWfqh!2D*V!a+vgsLVEIJ&0 zFr`(o)Yo5L)e{4;LJqm&0=c&?NkRZvZ!%awJA4q(-4GHankd)A^>hQSq67J`wO^+>rmlJ`mUfNib!Lg3+--p@jc79|ZU@ zy`KMMfH;u~TCO6oiVoYS^nWm4{7)Bx|6?DO{{Pd3{xu2~^QBr^*>1-(mPGGdkK{2! z)v&|hORWBp*vvXhcZPfa-Sc4NC~1sTH`M-O@Hz+-%o}f)v?|~A+GRUe>MRdzD8IMF z5>zWSCvUm6IsH7?pVGqW7@yOVtnR)GU-R_U9ZCHXSfSa3Zd~Qp#f~3ATjokC%2c?92U1 zV|h;(KbAs25-fF4RgY80MPlcT_Cs>SqL&G9MQ&Khe11Hm7IA&D<*zvOs?~eNb!XAq zH<8mW5Bx4YIq2IgR!dd)ai{3jMq6S|hyBa08EHV%7_{|tIGD0S25ake@znGgLdC>jjDKZc5hfZ)|*!dLn4>LMbg8Dr(f zP-Rvz+qn99m*N%5rK%b}PlmZhqZs(~VI5D`qm|lRl0#bD!-mMbU(ko-qFwoD&oP)( zneqoeXyw5)M)YX7%aki=J|gkSc-!vd3#F2OEBW|-rcoCdRih~0wr%<1$x|pArMYmo z_H=UqC}h`P`__qky*EWp*!>~V;kfqYuURgh0m1iC*1h)q*5qipNGcp$z11>0kzQSE zjL~%N5Q82WUI5J8zh`ZLnKTr;z*4^5d>V>WE{mV+MeZ~)!yzfIWcaoYa2e9z7w=5s ze-a=N{ybkJ3GW(T)^Aro=+_8ElE-mw&hg;5KS~)G@_MXjmJ8rpL$DtN5f?=^N*U*w} zeAJ}|Umv9qLT7yo6um37YGo3XV`qdV;A@og$@%c7S!PmTJGn*IG$#om1{BbNDVfU5 zH*K_97F)u-zfEl(wx;NMAEi6?a>bHLeF_T?XS%pg3rH%!EQyc-oCYBwN4V?eKk?cwLq!^Q9rjKU5v$OQZgvK z5}`JklH>a!s5heriEmL5;VV6b#qORc;o4YBV{RJ=<0CIBhn2UdRc{_F;Q6-895d!; z%fI9{OpH>hO|I(ibh>PR91h9Z*BWj*}W08h(UQhv|9S^wsbdIT^3*en%a~sp2WYyqY<+5=I&oW9k)MCZmMKO zXwml_SRoxXnz?RGCvTBX;}pbUH9s63veDz3NZIrvgNZl^c-#VmC&|`1Xk= z3qR$YN~6}Kmgi%(+uGsv@1@s{7zG>K^3{F25Asc0yVj~OjW1P!;S9AqaH(8ci+$cH zC}fxro-Z+Oth-yMe8eM8fCo$%YN$(V!`t@4ZKzhM+6SMPZL7Yzit}>?xO3S;3fBTI z4!>7Bu(vb0T%`vq@F*@~eAPeUknXj0N(Cgd{WZsF{V~#fzy`iqb3n4n3hZfS6TK){{h@O$nYa*j!?p9QlW6dIk zYDLNGOK@?N`=!kF?9B(O5066&h7=6U85c?Mm32I|t8D%)W40ckf03PwE1xQ`LJlz~ zxW~N&5nu#nS|?&dMYH*F+sJd>17I@m!$6E-l1*n*wWC)T^pXyKa$F{?>ZmuF_*ty$ z`=Zhcm^^1I@i86El*mo(z4LndsOm(z0&@)gkwZAo}c^8{?>OZ+2-S1-E1 zSg$lL9vqD%kQ?_LYz`|;@qKx@KT9dP)3V@ro%DGe@qEUOr_&CyN9*hjdRe*4IFTH; z5_sab8Kt09EU3}M+<8C`x1t!QRx2&(W`{<`OJvL+l&I$8bxuD*kKrklSo{5g$w28 z{49~dW*Yx&i_3RPc;v8Jd3~O7F-Fh-Mn~xV+|^*UF63R_i@ZwM{Z{%0#z0^R^>pbI z;V%g@JFtZbC8Yc6gEqj|)BgP4?rOR%;6Yn}Cqr~7ik@2lsyn;D@uLxf&bx;`!oU#4?@`7A**6U^Io-A60aXv<{#-jjOg&6r~i zQi0RUo`XS3FGV+x{d(Xh<96-0h#gC$wZ`CVWTB|wuMjfqb`KM7ImSGph>%L5JA$9z z((i61FMoNAdz|f9?rk-{LL1PALyU~%ed~8rEcB9bzpgbMk63nBF>{mdAL^w{D6)WY z+DgGXiF4iXRl?-V{`qD^BUK>;I3V;@sdT^KK95UXuw;2Wb9F*j|L*m;G33x~k7QA3 zJr?Z|McDFydbHM+u@#B{ws`-HjLkC+C3lH?_j>sDK3ihrcmw{JAr*JiCpFc&gYbF= zogxIT%2}Dwn6MvZGxJ%im}x-taJsb8=6t15t6FLfs&d!Q{Y%K!Q1ai~lr2I~CmNzS zh1xlGM$E$1GwsH?TI&-RUT8=TjCFZ+;mcL1h@lyp&?B{KN zD64}oe$4gtqPC9eK}G{^E<}h#5^;zcdYoRf-Wg9ZFv@oYu-e~RYENqd4CP%Osof~+ zn4GU!+Dqp!7*zV^K7PgU=F+QR#rbwKgBbUvb_K3n_Q~V|H5;^mwcs!$BK$F$F z9Emxv*2SeJ4-1RaATg8ASyQZTrMb%`I-i-=cJ-q3EpMDv5*-F>&BgO`mP0*{NnCUh zCqT7CUB^`)^5&V^wzR&;-SJ=yzxfW0!i*?urzq}FHkKdi<0>Wk`ixccW9j{+$YLu_ zWc#a{t{1gxI4yC*4HoZOoqKr~$**+7nSg<%L>PoGtq%K%7OZ6@=EOzR`oWp(x}GoT zmpp&(>Fkm~Dv&lkE}VPkr)B`5Ct;)!DdxqH`7JeHPk&c7H(1SzmZ_ASDF&=!WP86; zo&f`DR<+_EU-1&3LXioRp$XcipynF7jQfS`@%pvZTA{zKKKX;JsPvW{G@DWixI= zA~MJ=kFSjk*joRDGh12CWb-v=eTCp^H)soO;CMUH@ zxeBU!xO%TA7Zjxel4agDU*THF9y@HT!10&ZuXA!d+r3VSK@f1U{o(keLSEARHI5fG z5X#@aoerZ;>K}t8VxXi}au&z+bRd0X7l@RbfQf`+I%Q_nxzCxf`u~#B8Brv?OPoV+ zy$w=_hr}hXf@xdrOI?eyr_txK9JIof?#5-Ss@`<7*Xp(r?r_8c{UwD}Gyp;6w)N0@ zqvPDSc+u+#iS(PH5K@&ghUYytky1;P;P{~=?O36o6%iuc_OZb(yfL7st|l+uwmY2P z*ZpxC)j|b~`5Q%WjYW7?*>yAL0iAsKdF#)!j;4B ztlMs^4OIElu4dPcW|bojbe`_;&Qi*|ESuAS`#wrsX2XwFEm~_Hy(|g85wI(bvvKDi zy1cZMZW6x}l0iRv89Y(p`}N;H)}l9L{*Eey2?mDO&gkNd zQi#%o_FQkTQ-hK48 z6Shz4N9%?lsJ)rB$a1hNeeI5aWU*M&XbC4ew;#1IKlR)uu;JG0_<3yjU7$4w zd62K@f@5#1S9ntUX4XCFZci!&ko5}*Qwna1 zzwr!MbU!dqq*uLQ|EKfLi^FpK(2^EPibc5uneDaTjxI{wa|3Vp8s)8STR`bP9^W~%H@_D66%ZS3!$lQ7WYMc=BN7A&$hGIo*(bTF+;NGg5>Tp z~J4BiCot)dhX*b{oaq#9Fl66y@RT1 z$g{sEa|ws35P-vlHka*XYO1LckJKYg*v=2_&oFFp9Fl!9>2XcR)7fFDgqEsUAw-}^ z%ar_T^~)BDqqkdJ5|-t;Dbj@%(^TWMjQSqyc(KEnIg1&wzTbhAYAhf5eXMZ0^`?sZ z5zSH`IN*UXC=W`D5zUtwdq2!PrHXDcb=np8D~^u>d$eN=%ctovj=rYZd{bBFmZ+r8 zOeXgo<;Fo=Ui+ZuiJn%<1VdUXt~3Qh%hFKWip)X-EttCMKwgRbStNU8F+=$gq(OUGs#_iR+s!JzcPTcL zM`r9=2+Q$YH)z_uaZT5MaC{Tc^1Xj5h763}lTwWn?EWYmO%NmST~2CRPi63lw`mHm zhx&!|+Q(`+^DXAZBWF?I6$ovLFF+`MEA@xYy^Hc!k*y$>#|z|#qhOzwaN$C0q|V>U z?1n@1G=6`Zhv&ixOi7Oegp}*TaM1NRg-18L#HQU89Vem(eOz|F;~=gWaO4H?yq$Iz zEYQ+A;o6g>C$S95;!x&~f8YMz3YN36oeAZi#C7_;8;I4wO5eoZPx-!A}6 zRASXFC2*5BNn$_2>RrNs2VjW{XJD~=2=O4~;Fcq*^E#g`YLH3e({6seMgikeZtY|D z`n2%m*j*N%iQ~m`T03$6t<@>*i)|NU0n-lH{Z^NgX*@ReMBsPSldGP>lkOOf%(G^w zj^4H=K+-$M=8RL3t8SSS%o$t?-|vh~jg2}>8K3PZafPDlGlZL@388`|Xu~AuiD+!Y z5?%vbYd>*|o9=f|dLZ%p^~QR7Y*zco#D3a0sZww6e?6qd93QwRAr^~V{B=20>vRX! zdG12Mruwbc`4S^#=x_WLjm)n3Y6+u!`G>NvC?q^)EWZza1?s7CED(4*x;}ARG$4)l zxI2MKHuAn%XkLh)<#kpE|0x_OiwQ-?uvc$gF5C8u<)xo%7__O%o$fiH9xqTsLh414 z#X6^(J3IvP3y!($^mYJJM4GzZ;1ic9x-CH7~l&M2Y?WVf! zC+kZKltru(!H`41=$3erR1Ex17c?S)xma$j6L~G4-oR};pr`c!-sHEsT)LU*?^Bl# zWd6AcoleiP5FBE?nt~oP)x18)Le0Cfveeq`H%f9dv!$k3<|L8D7jP0|todO&3Lc!Kl=)7 zD`&TUmEY`h26gnivzeKq`EPW52WIO5L(0Ju{T1r2E5915w?TrN!EY0)SPwHuQ{|-m zM(~wDJv4_6;ni8)9}Rp?C=-m_e*&jz76~;I#9&i97vjlr8s+F?yEuN6qL#6naM4YE z5|CzF$(qMtzTS}aq#YiMYdd~XN8=H{EjHh^07!FqoWs(%qQ(8 z@2z408dp7?F^6<`;pc3Dk}fK|cup~arr$C?;iFj^_~-*(#Gh99%#4q$cNK*f$@&@Bz~L(a@KO_~u=PN^pE5SJT9e zokvF`r7wE5wbrXH+VKlH9NB%jvg)k^y1_q+S$OcNm0M!FP@Vyo=yS?UqNYync@K#? zl7zqJi95ve`BHVjEiqt3UurO_m9{b<=xZ#iHcn4Wug zDbB%(-kuL9Ow~(r)*9&lqMKTLU<*nV!PU>;$nD_Z{OJ+f(MAD-j1*q195LjBlEUa) zV(Fxpm9-qDhF)$0U!0|GT~QYzCL7K`}1w$ zr^HmI)I2%~Qyg1wZI1sx2Z$geJ72<0^8rIz37X5Gj(|`9Xki~R} zGvWJ3Moy3vSSxSO#Q4A8`R6(R&)$oIIX|C@^?zAeEHG45)ObE`kFdz_*dI=K!rL-} zU?VbH*GXdzM?1!?cH05pySz9He%E(~V$h?LgXTcQ*LS|qNW(z@iA=~`W~qOf-;xb$ zG@L5<^XJb7X6J3`LvH0IW~s#}2s9~>@Q~pW@vun@+RyYli8lw6Bkv~Zk34`^LqJ-ZQtf4{iHv$PQBbI#5qg*ywlcVX@DCJU}ktEABhr$BG zlqSV8D~YArew+CUCGGG}?Uc1MfoRpk6Ty4;I@%CkN*O@DOTms;h;s5ox6aAFR5Rb9 z=eO}lx(E(iDYswWoV(xU5 zg!rDteE)`$ni5KDsTpdiQg;!7WY|3((i9^(vTxJP?nLE7D!)E984HBob2ue=!!!({ zZjmII1gRg;s_>?CWqnUMD#1Z=|7kQB6HjDoiNFMp*&K^73TU0#i*wiF+t%vhTkkD8 z^O>SjJ|pxv<)1$V>NEL-J)$zW<><`UI{bkgmgfEW2ClpZwlq$s489Fb>LzBj({Hr+ z&DsmDxJcZ`-qF}E!ty>1@wLn8Dp&CX1(!0AZ=;LozP?|%s(pL=APc^g8oYmt_*^7< zR;E}dZLiVjAkzU@yOJhiVp5b$qWq!}#WbwFwwl{qb8a74OK7fh&V*Gcm6+y=hahpv zyEo3g8RcH+l<7b}f7S|Y<6F*suMx!1gVcmj{#Ewf;i?C9Ns7@oN?lDD@L&==G)3T$ z=}Q4wp;)m{3_jy6g}}~%5RxNOKjXb6mGM9tIj3Nu@=k0xnpSkVSR=kMp-+5e-%o&& zIUdae4>baMQHkm}LP`YO*BEN$$?u#_p8NBhpxu$%d)SV!GeA@U$JO+F zv5a6<+BU+|)ZZshE@-`N4{VsiTcb|c#zGo#J0am$9VzWQ(E_$Npz`zc(cy670YFSH zQP&p_FTNo#uB7=MFY@-vAp8I;7$RO0q&%~SlH$+C-wXD8o14U}Dbq!zgi~ejJwew} zDk(9j6wrqCqR*3?BOAD(VGKsw9i$G2Tl#O^M#*WCwu$&CsoLze>*{1NIb@9|3yI%b z9Y@0eg>`-p;SH zeDBTS~%`9hsi=~AdttzbQp!=Hmd z$TaGVfa`jDUi4DA!p0sRFm}CU>wEH!$Zj-e4(oosrz$sBu38Z{0H1u!>9tEAD|gIR zLaJwf)hF`1%er_xooy*!5z=_Q)v(!UBwZsssYQb!95HwrAwP)I`8v^vMU|ZQ;=F2) z*1Bj4$n{7y*{+u&_&(pe8rH_?Obu0Nk{6CE!>6tga(mFwbw_CWI#J2tN+mrxe!`Z` z(B7j_%ZuFB$J&V129g$J@cVShMIoQBMaAHaxy=$4O2k4Uk7RI68;}5EaxR4m4UZGE z{px)P@6M_Avd)Xnc(^rorCQy|yAwHLH!&KmY#Z*UZiahd{4_SNlijG-XV8L4X zMR$9pn7eY_29 zCY{;khE=XQzhL*MqdE8y9NfPQu^#K6lj=TXHQ(x62YDJE_hVPpC0DDfQmN9?`{LX; zY3D^LGtXI{vNrhJ(>`EHk*V6~V{?fj5pw+;Vkk6kY!xk^ctbb|_dt|s zXLz_SluEhH&NM&YW)4Xz_H>x(D1YEb6JBYNrAEj+)!4;QUu3b+ zl!N3|N@hBakC{T}T=z{-k zPa)5AjN4|O#IUka?&2h0ELu|b=5&V5LI=c9wUX7H&IxnOT{oN8agj&PWjMe+LVIZ* zD#%%Kf@@(nCMR@R_*weINsAks%Ph#fDQn&n*1A9-4-CiTWZ(K~CA`z+{H(R0mZ6V@ zg2Ow5*)ZE`rA7qsYv*TGD_e~~UniuFrD}BwRMd$XcRy=gJiV{Z4T4!gyZ7^5VgkAB zNGyNIiH`%{Q0$kL#67Rz%s&F&FTpvmAG=kh+8I*GVz0#GtM zh}OQIs$s12Sr@p73TFlM=jLC_~+<5N3v&FTXQ!iyX^b^ zYLj`M?e^UiJj4504+jbeDI#uNFMtg>v}3ni5XYC!%ZCvBJ*q-OOfH+vl2!J!QYr*Z zqjg{5K|rVyb%92~w}y$cHT}M*r3@P&I}}-3A0?N71||}VsawHw_0YW5@_r5`dI%zi z$E-0Jok;`eKZk2Ae!~nU%>qGA%}Q^w_OA$ZBmXjUp+7vfRKZD zNy+H>%+qT3er&eyULQ5t(EKu~_KamN7CJ_mYLLWvp4v&Cd#F|Glyv$V)3*#01Lx3B z%RdHwO)V7(La;TDC`gFbv?_D9)e~C9WQ!o1#U_21A-vXxmz&&8ZpG;eQTEcjewgBU z`aH{eT074_8eqMz6DqQVkza#-_rQ_Q!AUI-rx!j*@C#8AnSz~2J~1yJc>rTaNHUSC zYIH7$CeaEvEF3=O1?Sz_CH(+;<#Hv}8O%n$PheVfI%Fn#@SR!EB*T*s4#H`89TFi= z+zF0rio>Q`fq(5piAs)!9-r5@RVjq01P#_nS>=}a)NFYgFpT&=;$YyE z5O*nr;<7tbHyue9;TF7<@K>-}1+8Vx=5sHq(5+Vx!`A--0v`~bGD3hMiZBa&qnYy~tmWbO*frciV z(yKFo)T(OS7;AiHl9%Cz9=A zrB$m)F1rvvq_fCtOlAlM|JXdi#KJ6t49j-CL%SG8zu+_eBc(m@X0%b#htXVVFzbj1 zGbZ3C_Cg3GB820bey~5!8YG@xA(PAQw+Y`NwtPRPJBM(3)$GR?EgMrl7hKC0;aOZ? zl9va9SX5!La-s3;=FOp)tk^LVIAj_nyULm0{!9>~aOwzxVfAjkOd6J=Lo@!aa9q(f z8+I3Zp6wT45}Ro$#b-xKCZ)mQ?eVZnpCwV9mbdgiWmz?}?i7YwFdc67F~Qu?8e%w^ z4O#rwJC`_nMaIs5Fp6t=!HeTrPcV)$$Ze`k@97M8IVwf82(c%Whv>aF-mR&v`=!p3 zr!9xaJ)n+}iQ#mLN;n0k&M27sU4R4T+}O!(J0J26{u@t|etu1$x>yr>1uhYs_D|vj*Y55X*m(E$&N%mxp?zD0pWwR5T1=;-XMDUEugD{5iL;aBQwjOp zzF~S?4j5Fn^DJci3*7p94{OwT7L($hx?`9wfiwr|BC91}x=DFB zo(uRfBO@K!^U;ZX=K?ij-I2Mo7Sedd&3MT2LJS=_0H$-Qj~QV8CDo^P#OZuG>_%<6 z*A-5*m(c@3ketx?^hJQ)6lFcc0DOP(9BeO8i6Iy?tCi>JIu&W1fHycgiyhRFx`Yse zp@ytB;5Hhc`y9s};uovQXtwQeIWa?p0iwNtFNy z;LM_)1rLr6Cg>d_i!RLc*6#^qRa`^cd)0=s-0XjmeSc5ZB5w2kW*V+ai0o)eD?jHv zo@F~%oAi43vT5@LaADB@DSp*SJ_}#VX*iFf$Bd(2$9DbdGH%-YK5Dkcq3GTWv&mMHhbL5ULQWTWT-|R|rm2HkTbYWU$&|p4CKM zH#!j20t2|I?WK zTYjr-OF;Y=3iUT;1M`&?6+Bv&DG~W_>(sR#tjFCca~j)^SaAGd=oVIdJiPJv z5oHnMcb(~6v^sFzf*Y^?f@L3ymM?l{!Bq(Ui2%R7A?cK?f*;HmMn1ZsrLa^14^Prg z61sx@l1Zmwd7|p+v}<;#go#VX?(_%hw@8m)`~oscs@!kAyoeO4nrpM)4zBZsoyuX6 z%b_0PlpowAenT4qeWMZ>AI3DQ_3x&%DUv+5fG7S}`&`4YBdlx)d`+hSTk}mJ?(r6# ztu2f{v5uToYaD$dqa9P2IsD5qjk*T*0rj974oA*k z9bNM+K(Ry+(6&-F&6b&E>U#}s>-35;#g(;YmMLfOqt$vSMn(b5tT1#(n!Q;h%S`E+la{9;sUl1E9n`g7Fm!=JJ(Q*o zPsElfnd{Xd*rwKs=_4tMuq6RUy`77MJ;5@Gg_|n-m8;8b zJ7^(T`x>)6_gngmmeX2IIG9d|{*!+aI+0z_4^$S8hd?2S{Kl~f#)D%@i=3*w>bIUl zY8vrr#$e{IAPC~Li1Bb}RzPMf(zCI+mHcuScYkXH<>yPv*%A$2|9ZLIV5DWMdJjP% zFj7P=To~lu!*eWvI5G{R5R(1=HlBXx*IHxyY%w=iczC?P_SewfP069xOiM z>FsdxbJ&aoJ?(xDn;h9mkg z#|R&Y+5PKqpxe16eUfN3Gy>mgG)}*Z3@LF<8g86(>mL0ogQY$NB3l|+9Xr`WEXd`D zjvMHcJXxzP)hNgYVo!{BaAz>1h}11~X1@@lNP{}b^;eLK^I$g;qF+$nO|~9CJ)mKr zoEuySu4%UThNP(o<;n*dpn+N9aSR7d;ZPr5{>(QY3v)XR650lVZbPAv(8qLM>vs)j z=3laJepzXaFc$z!bixmSCX&co*=)A0R|MEw<3b@nMhS{x?TIZT4!Sd7=(LbgkmHRt zG2L0ZT3SvJ>0F+Hn~r5eEaAg7l1<>2 z)!i0f_{1RYS2S?$to;>00M0l=E`K3He>WWi4M!q})?K3|Zy;VH1mbea15lX~fv{En z39zqi^R~)ED$2TRw>m85jZBgV9GW3pU7_7zqm7U%Ul+H*|KSNZ<}AeeS(6hK8Y}kfUMY)p`p5k-gK(45PTXSJsduzW?K| znx`#DtVtnXwlQa}G1Dn%050)J&E#eBdzMM-q6}{Hu|vFi5yLD`!<9o;CK`osBuf6T z=L6ec^I>Jxbpe6ZTu;A7d2KHgQ7vG&D+xW)sAUXzoBEcWDl9hu`rx@TOUV6x{YgkfKcaFgXIW)0qX zcc3?HxiDqz%9rJO{Tl4)uEB>UUJem3qZ$|&vZq_>Ac74fGu*3+=6aLHOf)WNH0n|} z1E?KLA2#aTX3%iw@3Po+b<#L(!;uX72-mLwo*qlSCE7rxO`Pk^X zC5ReSD#Z8o~QKrM&7e^R&aN!%yKP^+JeQajWmX7r;qH4wOwg z4>(I81w&m+j=mv7Qemj5H*uz-4SOG*wB-h7DhERyt1eZLEdZ*jEX;$i1z|%9qxEb3 z-}I!25MrCFPhgNXFmS|)r+HAJ0}iZ(5Wu!Wt~oJr^4hu$q_SN2dy{}Pj@=u&8&LY7 zoOiXp#4o<*!1_m6e|qa9x{7B=B zFd3fLK;Ps4c;VL|s22~3iOy;>kLkYO|1biI;p<#(BsG9qe6JS($cPahCwHOA84i=h zs~vB*&xu7EP#o+8Wc~+b2uQ~aKD7lT)2Ij)jb{>_G*vA6_8%?gm!I&kZ8aYs@caCg zJP=~di1U#m2!7YXoV(9S=aZe)?$SmkXNqT&AfQR+=Mvo%$+rIo7aai#1C06_7(H4< zD{-D*UjJyO0=ySL;PFYy6f5gaQXTVw=@V@)BVswNNXUdZ+-D@Z@5?HH@8mmdXbCZ% zIDe={7&)+!bERLOp=fwHELYb2Xt2Bru@9dDp^-+GeaRjh&|q2C`-5@}_xre0IAx@9 zMzEpjALhs=Soom4s+kfp_aqVBgXg?caNKGmvtjkz*K@kwwDAY~is5LA4Dju<@9@50 zM;C=0sB<_-g-9iQ)zE`vP%m2dwzXxYkJ0E99n+6_=qL}x<8c#bvt6M{W!`LXM}OKr zuq`ZR6b{WzxG7iQ>WK%xIe$1^Aug0mNp$7+lA3d@`oU^2F0cJ$wJ^G1(W%y`iRu2* z$?kN=kXmdyk*+c*hfmxpLw+(}s`MvIgHop>b-q$fi%f=)oHnCDfx0;8+Uo=0S~a2s zKLQlCf@ZNfSORsQ(}u{S?Y3PPU;pUs%Q3e*e%HP;=_#~KJQg`LtFG00hYB!;HJ$sM zFWxt&u`sn|rV?LyARb!~;4a!a?d`1rXXoVO8Zf^BMi)%qwA3HzHGSbkF>!fg{Y9v) zPMjzyuwIN~Pl^_dJkoAO95z#cW8Kw1C7ZD?45Ub-rEGj zbW*X64+KI|Oi7iv&pDo)IxY-?K*@iclg*}#1$Mo|_s{AQdKxanpLM;v(9 zcWf>lfSNDXBq6-r*lKU@52WbAP{?Jl13e8T#PNJ(rK<3wthn9zQZGNL6BkXi@2!q6 ztys#cO_W-;Y;)6+%iC3Gj$}4Z>mA;!LCe?-vDlsb_SfJwxdE-DiEOXSIG;Ao$C&QZ zFH5clH}<4Gh-fgV{wIZFMW9o z_-&DhhQjd_7^q6a>wUc|@$?udch~aP(meGBRN5_7X%ruQruC@zdfpoXBOQ9!e5EiB zSBdrI$1JZXQOz$4!DlsSsGTeUJm;|3*AY}}H>(P`4^o4;iW7xF_~7%%^H8!GVB&D7 zjRBrWOmG_x+g&dd3cu3i{ZacL{lqtvICZvMDo+yOnk)L^;!MA8|KewyLf=|Gl{E^6 zBs>&}=P7fkx92F+2bBH_evMsO^^9d8;lie3MT?3?{ZHyAnlOkGFHpVdAft@eoL_&x zd#qABr&MP=R#tSVK43C*pLO4G@^V``*Va*W;`h7dpz6h0C8+7lHZBhEuIh^o%mFN+ z76<0hIpNNU`(WSZpZNAP=bT)9AC$uw-vl3-OX6S`CyIjrrtJ~A-W`_Lu5)v#=J_YU z1lDZRZ1z)9=C~a*((a?n4K#I6@nh${0DEx)1L& z(yX7eXNv@bI43KZzXRf8K7|mHpcsKIDS09@rTfLhbLZfICT08o$2diNP-)65lI&>i z80(c5jfqA0PgHI)PSULY!V0F3K&`dZ#He$jK_T293Hu%}nFungY_3?Yd|_}~Sd8Q& z>S_1MR5u}i>=MlT>*zu!90HlBy1jO9dAJQY>T;Y zqld+GNf+HJm(M9grfP4Q^3#p>$y&WYFBq5h>FZNaN@*xyvw6dNy}>gs^qH4Ti_JDl z>TadFVAs*wf=39fm)Nee;b99EmWNc+0%9_f@jpbZq8~^mWM3GCs!~h+1OAodrQnAv z;4B}#QSZpg{osuqGFm9-Y02?=7`!%rpa$qSwdQ#jq5$o@>w;!@1o!9q0!0EyvR62uc-CeUe%l>uZApIMV zM~?UJxtrZc08JXA`J)~IxVQ8h+7XcaXItrS32QWwKJeJa;d}<$Ow5Rzl2M2=CD90X zl|@A6^qMcXnG!u%yY=Qxh*qHvW<1~u-n~fqg*v%1@$Q6OT&AAHXvToOy@EyZb7ih7 zE^sX?znOM-{5<^Zj9Z``%66QJLU#2>Ij%uUQ7sM37~)n|mw?Rrs_v{@s;MECV5w4B zZG4wZUIu-FvMce$UmgYKJ`vq_wXC@fB3?8uBRsV*X{bFukXVlbuCfR9^1}DM!Ww62 zCYym3VeZR2n{tWmDglxSPuP`N&sB7chl$eW5Cji=Smp4@MkXaEe`zFFpU zbvhNIQ_%56Y9)c*oUS&VmP{hRV^9!vdORe;VUEEklDa;$$zkG9G)|!V2v}u5MGEaE z#PBUSkW`vP%7AUaE~Bo^o8;+R1{Z478c{=xikNZ_)QZ4=8Uugeu~L2ECTexqV>)AL z#^bP)C-8lwR52rp#5`eXP!BUWhI!l>h&YC5NmhvD_+S;C;; z0HVkwgc*du6`&Qi;Ql^)t;5QvVZMf>Z?_#tUl*8<4PLLt1oComNlqFp+A51(?~c)a zAVk@crDht7Tj)ISuc(a6`NiHFi@MnEx-PC`(1lu$7I$ROfI3f|BUCN8~=e%>i}DU-Blby-adx4=P1eGwuo zYgR0C2slV;9_=Vi}ff$8B^Vb!13|%eJAx8 z=8#D$IhhZj(K7Y%Av$W?H_kQ08hLFz+KNLF--isB?f7AC7ImZgW)gyq+d`xj1W9E(@mx*1afMPzE=7Q2*o@q40pcH#k46l}y`__j#kW@WB@R zP$X8`m!56W@TN4|ZxyUjo|_FGgVY)Rg3V+SD=Kp$0bEQ;fP6or7&pa@DLoT0KF_tf;k?w?> zV^3!Dk~^A=C&=J7iFdua7fM8krpxB|@b2w~DKYKFY`4WAkHcFnRHXrFiWQn**Lmkx zpzy@#WdE#A@7&;u{3B^hRqCjYbYg6kuJR8zU1)Zd!OHtRqLq0cck0r#p9%Lyt|Ih! z&D}wwGo+E!*WYXO^BNFWEr0c1m{3`UU>jATAWkY%kw3823vXj#RT49i&N8g(RqbUR zV)Eq#R6Pwuq#z76?sxs@>=Iv0gBp)INb?-bSTj3MXuGqN*m!kUjv%aeIAiTqSZdqf z==F91vi*&8sm8|$$SW=EZ~CSe8KnCJ)h6e(TE%skl2V0KnByL2?5URCij@*5gOa&- zSE`GjnptL?w?E!33%++bX;>^&RxNoQ=AxEi^gGk4?wDk$MSHrB1Q^#jOI4Jb1!)Kq zAcUvTP+>>)AwJxe?J;@~B*Xt7_P#o*%5HmCN(m8ALO?5fg8ba&S#1e8Wn zy1P>vMCtC{Al=<;xG#!+=iGC~9rxb9e`7cti+At6-W7ArHP@Wad{)g#1IYiDMSRaj z{Zz}tYQoHUd)+4NjJbG?HS%Wo2Q{;}ltQw6R*-~JBw;b%?obuO)=A>ZTG8B;`%Vp8 z_1BW41WK`--dV64WU2Oz!?2{m&{FZtsE>Z%rUNU-QG=<@ZsQ9^%jI_#ZA-#ZpBwAX zHauMRTV6KuG3rawgKti2+7Fb7tS(ZR27C)nG%b>uu#P!=j21F&Mt~!#U#4M_scX{y z(*(Ob9>t~ za&mHv9m{0^{Devcn7)_zlB#91idFF-<|F_OV%k9)K#7J)n!si`G?XHdy`+&VDv)&O z&7F2!>Jv)8`Qh@LJrS&sNLD^pu&#cSV;!`MC`mgLHH$xwk&OAqc|;6ufC@w{9%HU5 z8DJN{SpTvE)fLqiZ)BRTCLs^kP|>_Sieowrp``ouHRs5MWGvk`v$@2R(vvdC3Uzd} zztkpW#$8x3r`qP|;U=Y9j$Fe6=R!PW{p;$!7n)xSZic%+&aeU=0Mi=2LUJe*Cr==c z!X&7#9=nuG;FbtSbLZxl^Y4EZrHVL`XT~K39BnrW1^+!|3fcUe|is3>gf(%wn5A(4IC^+FBSEoJYLe$4`C6~@UL<8OhCw(*(QWpS9=Y3S_k1QYiaq`KBi^# zT`wE+tX@;-aV|Qm{y?ls+C3TwXBfU};*wm;{}?3l)3+iqz6&03hcr%tnl!g1eLg5uo zlPJ57rtzT9)>{b4l0Ls&j2SaczB>V;D2Lb$qQS(I@tMpgti?|%*_W50t zWi-D5NxcX?XfnT8G>4s`MOc4zf^5BAbB%l1n0Q~NO(rYh}3iBH5Rz*3#=>{~&sNPM($YYjP z=%&(-2aW_qPe;}94;_=31w8>>>P%`qpa~@naw$pst64K2d+RHk>R>*lUjhz$rWtTS z{haUwbQDjwQK|` z;r*Eg=hu%yYE=B9I5e8@4bjz7cEjWw_T9wda;E_;RAk!cGy)r+N21+9k8^FM12t zF_6Mlq5zm+gJOR2-I~@V=RN)Hb1QMH9}a)s&Ih>8j1*ffe~G(vrlyX00Tw>omv#X8w)WRKQ3OrRRulG0>m8nnu62@%pb8tC(U{@-l2ZO3UxkV z=_W(Oe6N%`>8sBgI0~SG=cLy;H^q(?i%_TD+%a7s2Dik=ndQ(ujDZ#W%dAA|*D^*Z zC@+shOh<$K=EwI?pI4^-j}0p)&+S_zu3?teUUfI5>W#K>AP0vsVd5(#CT)EPZDpRP zEYxJFx%-JX(PJ*x$IW;johCPP}1_fj}nS%yl*Pg=O9co~7WM3}e680tZPayb}8JB0mT+yVVCGnBK5}a)fSpZQY(wcE8?AAko0?c}JF1?{Tyt zFP9vvMO=iSc#QYRcNJ=Qw|-AOyazO>Z#J7)4U?n=TWh~BqY+`mO)E^Y6Mq=%+)ywI zMuJw=(Zw|ge)2K%mZpC0LsmQ$y2E8L)Zod++^@YzI;5$qR5+IR*18xGLa^>$V?NW% z6E^|nAMx6Qr_i?qqP40&wE)awy|YVdzqK05E#ZC@OcvTVAhTI2C;7aGPAlDpi-Nw; zd?5_>j5X2zn-Nm>O99y8u(0yCFuqq$6$%z|F+FvxQPcRR-d|UwO7CNRrgc+VVoEQ! zf3t&T^JWDR`*mDALJyLPVQK|hK!Y_qGQ!I!#E)=7kFuFsU8qQtE_|Kqb0Tg}i@3&` z5r1aPwLV2!Q8&2`BNcWpJ0X`kNA#}T85GBgDqTMmc`HW*K00wEfQq%vI2@Amw*6Iv zcLjcN^sm)tct7P+Bbm1cz?B)0*ZoGYoJ%U82L!O4Zz^8FaaI9g@i7nkc;T?;B?WZr zFz#PNrnPO3_|g_7pBugDB@c*(14I%P_Sw-V!^{^q0KCmq=L2pM75mu-Ecit4Z#b~u zImGiH&GO26(txJADHx_5O^b{eLUgw~kv6|#X6&OIeL418g3Fk*#-7Y{5d=NIKAzhP zttd;#*JnUR46Z9tbF5m5jk{jtgXve;sq%V@#49<(D>+M-wka@~SmWr_{Fyi3VAO=923Lx_XrtV3sKKPJU!KQPL)`EC+T zko@i3?Qzm;Nt2eRG3aL|6uTSeoO^`x9z+HmArbB|gkiYE&bk3v25LQr7QD#=C) z7Vt}Q$L&_QABd+Me9j-_*Yc)dMxRe{d;Fdo@L=$vJ0kAU0Z~1~gt$vmMWwTbxwFO< zKjy}})-NVEH=m`tHGDYrD3hYlDh+*9&wCToGvjc~hU!n^&5yKz`(Wku@63R6rtl@1 zjj8|I+-quVK}{?87}`+Hx?|S@oUPv=2y}@D$XY6&-ovxyWwhvv1GDT7Yn))5;r&F? zg$ik=o}7wX7-89--49E);`}PC*2A6%jH;OXy;8Q8d`6ni!zql&02jsoh zkheb`S|L6~B!XREh*Qnntf7j_(se_7EtM?<+xikc|CBIs*za{TbH*qlDld?ea-TP~ zApO8ovk)y=qJ6{hjj;dR(E=}0xR(>`{sltL@*9BwKjGZ$*X50F%}=-`V7ml}Yp308 zN68pm07}Q_ron$f9ShO?h0j)wvlR?p7~yw(z`V<{B74x$7)#fFnS)ZL8?nRVQbKD- z0^f6WYgfhe1Y@kH6%;D65w`{f@- zi~~37y= z+6$nT#CXFf{9j%R7*CIYnm@FGba4N*#=m9Q*FdxWr^EN-LHs{6{BIKW|1Q$Ai27aO znumteqUxdfWrEnXGzTKZ9kQ9r11&-(d}Q?AEi{PUDcy3&_(iRCOh6* z>y5+)j!RzEW-BpqoS;7`d;BUGsae7*noqF(@9Qfjg7rhjsq>_Ygk_m>(a+Ei?k0`E zkSpIif5PE#qkc~l zBWZse4a5q?KqsQR-%i)3G{-GeY41qLsJEUOg>|A;Xgzb6QDYJfP2yeW?D&gr_f~&1wf>&;CtJ6(#is zSVS)j2E(QqZUM><`J%TLccg{#8S(^Q7(8OKFAodo#r+@S+Slu}!kV_iNQ9oKEB7S& zjbuUK&c532%`dUufi4~sFsNYDC6M4(|L6qp5X+JDyo@gII%S$YN|cB{a+%#QQz|zV zQtwuBMCzaT8yvQ@LG=&1FYuf;HW3b2>q~0C>9j!@ALBAmXyF1Jo8Sif@UyR78I=tb z0|1RdmPbc*@tYLX^^&!w+Y5ZvGRIE^+|^Ad_~Vf{+MWV!Y$oF>ZM>%?BKEgGS5Op< zzlz%ap(QN_vT56^B%MF}l^=fCkObDx#~UL(K*Ver>cks}a|Mu_iblH9W`ZKCZ4Z75 zfnM_?JwORnAeXs|hmgw5b01zy+{TcnxM(aH0-2XUGHRynm^ffwvrbjGl>k|z0@Du0 zBt1&>m=v&~ggmct=rqFb5&;2bFxE9D(^-7W&f@OKJ17m6YPH22oocy$#LV9MYWzHm zpMSGKu6&hk1LN#)b(QRMVF10hnJ)UvOLVwsErqC(gOk3A&)yp#qWR4S}-w`%W6;W z7)d!(FvsyJF7B6QMrIaltZ<_guy{0w!fUC>=`lQGjKzLv??^}HI@nnf+Ttj@7rEiKb{IflL1t}fTh zUa?^fSzR5R4PrPdc%ECPTH;X4^*RLK)`r1<7xjX0iyFZ6Cp)pzTUC!6 z3_>6+_@etW3LuwSw|vOd_Z>OgkbUL4_Q=%62H*~zScD4W!hRCPxvg65{lZlIX7a21 zM)_`AiHLlU&a=>u3ajsgh1W5kNq+gn*zx9vKBNiC8t1t;S20o5Y}P?H=P&f=AEkK! z|MT{eoR^^eZyZ{*4gR%MT%i)?e&iyr-*TJGyd6i!2ceGPte$Aa=73#0i~_~l^yQuu z`n%m2eUzQ6}_V|zIe`0_7O;VH3~$x9EnlgJd$TY-htqxq|=?(sf#AiR|R znahc8Nn$DRczs$T8e7|)kzcLS{Izu=hgBC1kTM&jSRe<=@xNv@Tdfy}5QlQUbJzy%)2|_`5pCEt;`>)xIS9 zM$0`~Gu?4<#|mAfYe}8xFo0OD%fpE;l{$mC2Ov(A{91p^AOpmj2$IQsLR5AJ}03B85Z|&*hDYN0^NIH^HLF=&`wM ze!_2htS?7RTKWEw!3#v|OR>W;L+O9o^JlXIo(PcgON0=A)0hAIDW#0g${|R;RJJvW z9#P`&SJ7=@kMl5WzxV#x)c)>ncLichplzg>ieFy;Y26=%ai=!;JMZk=|HE%{M-dS~ z*`uC+k0eCN91;1cm`tLR}0zm5iRv_Bs-dgzAw*t7xM8NufDboA+-`e`818)TY zvUpipgZRH)|JTAxKzR#yVlct|n^|?=2mrnTD4GB7?xw$upcwf_CCtFTdi9Fb?QKm{VHV*n#)kt318; zV_YUtMa8Hl4Ndk)R_#*xy)nJ6-|I#mALcRXJ7FPl`)uP{fHpUg&xyAR^ipg^Dwc(lv#<-RMV zq7Rz8Obo;v0Q6b|sJ9+L`4lwwav=uaX@mNYSr&*W!<-XX7HdVrV zYtOqR2(K=hJ8N`;fQxEC-BU|e-#U{ zH-N3rmNdDWKHx@$@)y0o=-q&}$j1XBPU(RV$MSO&mGAy zz|TEc`h`(QLSH{~z`2Wn7(kqn0icU=i>=e;{`iZC-wTI-5@X>wvb3I*2K{e!Z;&42 zU*rG;k!n=2MRZ2}i6?Q#JtYkk9=o*?14-OA^35FpS(@EiZ!9m*-obP7tS63jRgH!` zMyIn;RIo?)z4k7B2`HM?=c|2Jxfcp%=!gkyF+9n=J>%sIjNX&yvwznjz=CgL_qr<` z>*%L}^{wxjcyIg4;Z7LbPXZ!2p0M2ilh9k!uEhuNXDU=Zk-u@I|@NHBjC_p|-!qokIOjPua_w)-3 zI6el(K!8JzmHJZ=NfcEx>W$bPTRLFV8$-3u2kcl?C!C6BYe^+yXv#gVo=h;9-%*%n z_s3J*UO;ZU4&#Iv-H*|Eto{v>ZGbr6IYP-|HW4nj6j1*>gHGoYCtZ;0Ix=z!iGrbY zxP!XV7T-_%t)VdoLVUK_a81vfC6Pw|T(qCJjK-;iP7iwlqxX&nQH%ORn}gfMSnP7q znbw4vH{%~Vmlk|L^)?JGa{N({-l-UqWga<>rbz^2GnRW)8zVWfm#AB{hh2$e6;Twb z8mD_fn)#J(VKd811H9qsKLYX00;K@jOz}Ga5BKpp)OA+4H>lmK0^P2GO0AJK2qFU`i z3~j(1HV0ssfw2_X2Bk@t-^XnR2O*h`b85B=EdpOy^r}~FAW#nki5kuRsu-}f8a7T=~0P)TO43Rr)DpnV6@%3{dP*Pph%C#R#xpAv5hsSF0@y3h< zr_)a61Y?EG$IIL8xkrhZLem`w0g1kdPowpTA9dk&aU}dm?6-GaOFG*`7_iUg;<4*j zeaDX;mFP<>R!-U#_>PQjf4n&+IUy;@z{%QZYM+|7o@qo=a}K&QkGuH=nG_zK{{aj< zU?|8gz2htt1j@hzZ~JHG>(DR{Rb+XJJn9cry&a(hOntKHQzB7{X&}6V3!IySG!Y_e zmtHzt=h~Cqamd4y#_wuu^)08{Q)Z2q$JWs+hP>MQTLCBdg<`;M6IP{?VOiqQ%PkB&?mJzVJ3~2GH%22K} zO&jY@D+=i9$BAaz!g$w@lD0YWqd(R)R$5JU5ee>DLa(EW^7jLB2A%Op(!+8XbSFg% zJfuD9&j!FKkX|jf3smqd)b&aE>l1ZnJA~Nf;^S+Hf%C0{Zx!?xdS8#fwT@4&B(OO( zt@J~iZG=omxd*H&9CtXkAX{(iom&h}&zqBvmm-esC*E$PiQJmx)MJxHl1o)Rtcss> zP@Xr&soXVB@i?qS8=t+ZdAIlI)}a}VmsYKG65tlLoX+XL-5^gnfI-!M-h4N><}|5i z&Qk^21RZIRm&tvnVq2kLNx50jk2=?T#@9fJ(Oyvz4<}bcmji)hVOyRKGfuPp;VG1 z6;z!}_TR~~l`vg3D_nWDM(KuLO}^jZta5+wzQ6F+;StjK#z7kRrst91x7dw~o??)A zxZ~cu-j;^!+dRug&Rx7^)+*bJm00aS2>o9gU&U+N2sZIzM

*^=qdZzsWI)2zmEHL;ZTcs>L zn}Kmu>Yb+1tH@%BW#+FF8aBRGWFw76%2&F*Yn~0kGkXmzl_p;pv!F-h$*!Xz83%^z zB5N6n-I|ZcR=DifWk>Zw#~nJYzS9G1mlz+3LI?cghw>Fu;A3-m7}O#~!WA+h9O7Jz zIlnAGw(bNB{gL;}lqV-yr}&c-c+7=X1xm$-MQp3RH|^rFymOl@xn_Q|?8pK*;Vux; zC#R|zN+J3pBfTL*shGh;82apq;pp5_hq>J@WBH)%@tMf#$D*Gp{csADJohG;x%tYl z-a1!vI+$I>wu0I9ctk9&N5c42C!X+l)Zm4jl$lLfZjEO7HlEzvs;R)wap_~x=Hbg! zVp_PIGlrn1Phe3giB{Lv<-B`B!D+T?4$*u!Co?)nW;O5b*r8c7?ZBqBR0BD%)74o? zuHUPG%vr0Kr)(8BQAWwE;7}{kOnFi(<;R)hqTDUDZ3=YSQUQLFYHBH25>|TKd+}(o zLLdB!oX-!j9&uM-v5C~|B&J9h zp&gui|9%iK2%v2iiBD=g<}-DD$x(+J=pnCqA00YjgzAqsn9}<2J=)W16s? z8Sp8s03E)&*ymXpUZ~3F35%JUQ>G~H%kOQcb3e&WJ0Q1HmbC}0DI&CNQ9hP$@hI$P zC!W3KqgAUaHPUvUzC7L9IiihAVJF)GCyPyE8MpXBJ-m91e&PUA9}Wlq3J%}2D(xK# z>OlBrNkiT$G2ETbxs!`9a=6-g2A`n{=LL~T3;rk}v3Emsf+$YR)`Pk=A?*>N!-lyduOvq#jJ1vVY zn;y#VtrPtm6*dLTI%`#A!}RUZoQA7A)!3)vr!SI6Z5J<$v?}2qJyGlE$5E*v)T9Ay zx@;vvU~D+K&GHA<`k>>x4LfMO5Rx#Sb?VVp;f`77=agh2(G00)LY}VjQbPH8AeM)n zT5NDZ57*oeYL!2m<85)AZn1RAV5kErV35FfwEWB}>!4WewH{DU)KkmpaoPm703UKO zN9WJfSV_#h%^d!)l`y&NW1G(m?24@VC2g!nbOfa;3Y0j_CJK*&v|M*w zh+67)^YNu%aPFqii+>vy_hjnDRYcYvwmqMiwv6~6i-#TASU9RTocUCU`?K1IcPLNo zH>SJxERQ=G4kj%=Ga4jf8&yqzbSxA@wxgGiFCybksl!Ep3E5xa7WvgRViuUXl+4%P zUGJrWe*qK4m+ju13~G7#d5dbpD;>##o*xz_l5p4yzZ1#3Am+CT>g4J(- zSCxSBCR>=bxh0YC_rk<|8Ln`-WB7sW>J6V##}`Xvh)@u=x@(xa5B+bswCy%xqGhq| zCg^i{^?L(+0IT{=XQW*!o9I`+AHe1z0d<?g>-{{Gkh zz}^D#&gckL+nv3xc?F_iyw@2pu$PkKMt0Jo*Lx)78V3CszpG_O zc_GGb>TlxWv}$3om=3dl1d_ zy;$n+`x5U3i+4#gZe%Mh1`iBf^eXr7UUok42A1qJ!&!-S#J3oFFz-XOz6W-W-oNx? z1p+fgF0b9mRKb9b2liTKRDpBraEI+Ml3nqitrZM&OGGn~!X`668^eVIN&Bvy$n$HY zmK9<8Y|{khH*BAJ79IIFFL2y*rqA&FK$?tAI@a`o6ru#?(B^3TY z>eEFBtOPi0)%)rG=cmL+i0Gr8+!H<|0{=YxUGjbok7)YcOGxQIn$XD&3(vwhP_-cX zN7DbkkoX~t58l6ZiP#QVvDh^xn!D)TDmQ!RFHt161S@X_FqPdJaSk~y780qbh8fc%*|@> zt@r+qjZ1Ex-J(u2JQpt$j@#pgUEIdz&Lb#o$%BhRuo)92FAJT3RtsUr(9_)nEa;2?9MZ0#Z zUhI}B(QpH~UgDw=#dz&tttv-c#E#PkX+)W_)8-s85 zGr*-XNph!;&F1sVl2mIydmPToCNMZS))9T6YC=H4wvN8pzJPpbbOV>2&0n)GJH}L8 zd;Y*vuex??Hec@`dAsG=^NfR#%W=B~(?+~zjO~<6Jf_sDS@k(*=r=zU)gj$aFja%! zB=<=VD_FcXBC?oA8_A%jdiqr0VE2ukcI%f~O3vAZ06P%6~ zMnLY}X$e@RX6SWjf-X|Pv+J#W53?z>Ew<@OF{YjU#Wqim^Ad3*rPA>o^{?3EKg=ik z=cPti8ACCo|R8 z#v`u>jt%@uN@OX^$l8GFC*74fAO}a%3R^J}C3S=C!7q$LG|vd49khz<3OA=f+WPTIM#f zyYvugyjahSt4L7{lB9PUTw9#TlGcaM#s{&=*m@NPB#%rUW>E`njhSY<^t@KeHaBU8 z4*%#8+&a#*2vC|g0kHpRD^;7LrJSgy7YoY9iwER+7nJIQ^5n*OX}ff5qq&x~ni8=E zRt>WV-byn-zMQ*68tbl_5@$vFio;o_44-Aq)X3U&brGMxW5V=>AEQ5y$#i)caB)Gu zKPv7e%hONuv-d4I19b|N_8l(tAG4K_Pf+gb|$Yq6khp=L?;#Su zsqr}%bq`+kN05kIyM0$Y^Q-_vZaNi84R8*td7g!Y6+-+P&4+3rWEWf` zS2K%(Nc|>tkZgcn;f9CYd0=LFJ{FMK%@Votq*B16LBW3(Y3?k?L3VHB_FI&JMntG*SY)7XmTEtFAlg^>e#!MF;gmr;xS}kB{8FdIE9mO{2!!I} zz3XD5{62x%F+b$_Wmd`PadU1MmJJqZ5s|f=74>5F%OuA4@gWzwg;>;TqprsrKdEm3 z&3Ni^Sf}%miXQpAP_1kl!AUsaGz9`Wj>3E{`@^LTEx~*;heQhNE5KoM(Oo`(eo9SI z<2V=33f_ISmuWUMx1)SD&f8>tb#61`sO^nvh$&~Fgr!&Y@%%cH-aMpWu-XPDp$<5V zlg=c2)L)tv?>O`#(?#(v9S_crmB0Rq=Hs{dHUk?zf|8|~7%YV2ERVpDOImDCtIGP7 ze~i93D`+&5-E3T$Zw! zW{&;#+88@|RMllI$O1@%ELQKkEzX0b8uBUgB`jI(!V2g*p zf4W&sN+{cS;;2$S=OXok53XyooOOOC*Qr44>CqlWgaO4uE`vR@bmwtb#kE_`4QnMg zHx`v5g*~BK&K2i1j|})(tDDxjBew`~J@TMDNxhb8Kgut=#an?m)v9Q#E-;?zTYkeP z-{oQvwY_L-YJ@hJV?LBn%-ki-D(xiYB9;0R6-$0+9^?K6Imu3o_c9#+u5A%Ul;V(F zkN-aN;ud9-ZBg+XQx(8N#H`(&O;~YwsrF&Im3iLhUK~)lls-N&R8yeqW&# zRJTe$cenkGoCFd=^*06PSRD_wv)J(P1$CJ?{KIzwfphi3U+rYfVX&gGZrEL}6;ot- z>}9BkRXVC9<$`iJPt2z3SokIjmx{zIwTk#KMHN4*Mw=Q<%}yP8c+x5)6U~LyZBtNW zDYhY4kIIV+#E=C&?MM*oX^e}3DmzZthnf{-dW8>jPl#uvn}I+T{Vq*@vD(hp%9UGF zr8XXC^EQ(;)yQ?uAHWdyS;38?#V7n zlfr^Wlrd#3_K0=tJ9-xCWF&WV&}l!tLFQrS3cB(2@;}U#$5HIn#WfS?I~(hJ#ciQ- z+iZ2y7W-&`&0_Wi_leZ40}VdgwH1C+hb7K)xq{E~Su#g1ZDAmk>N@_wI_X@l0CR^v zprY{t2eCmck&@_F0S}~1WCs~L)SWYzbIL2VHEaBeKf9-Au3l`$W8h*=uI?%mIH~p1 zXB~o6HERqTK{i-sRp>*FTq9N7xf&;u1=~I-rg<6Tyr$6+<7RlgEIZ@0pUx6sxg$DC z^yG1J_cQsm2IDE*0k<8nJ#S*C?<#JdreEo;l<#@eltp#VmvfZ4NtC89iyZ&q0?_hi zgVN?>MeR-*6r(6QBkv(+(ou^qo@Hv4u+Ps?mMZcK-n2qO4l`G@Bpq%$_FNE5Po5>8 z?ut#@zH-+*w{zf_+P!)01N4%T->n6jl(M+X<$w_+8L_AB*|_!aS@Nq}!&fy(-~kj! z=o+HFGH`yGI_b96`h9*$goOdoy`ol)2q=RBcr+mutb|n9!eT0;udhc7l#33gPIju~ zYvz+I;WDf(YUm?GIVkB!&RMw4-#9O#v1CQ%v7He77kX|9xb0HHOaux>eW)E9N>VTj%~NSUZ&sEMy~8b6VhQotW7#}<3^F1ysBST`H906$Lb9N z?FXMjpAM$(n1eZiZ`#zISa9AuMSN{qu95 zij+phMS!$KRPyKV*l5pmO|Q^)k*XdbwF?h1L{V#xJ0x*Ee<6L?QM#44G=AVu4tTNY zgpjv^lk1gyskC>$Egk#QYlAHw!99Y6HlNQCGw?HqANi#*bRIJ?v18TUH~6B zxR>4<4k>@Hp6bHm;i-M!YQYCJtM!E5_r>bMFDH66AfpAgz=B^Ntl0zQE1h}_l1zxW z6?qg$c(u0=vbA2m2KjQD68Co+9^>`s4rX~ky#h9U%I!KVwe`=-)@QU(r*n2$;}0uJ zs3nS?VNS}0zgtTSOrhE(lcC-?a@=(EE*Z73nZu4!hO;w`c^*BKB_hJ-l^@gbIfh52@r zQ;OVIGy9q2B!?pp%8^%%-4SYqB=Rmy5(10WKLxkJVW~CsIFL|jrN@;phx+(Js+qzw2Jjy`5m0FutdG4#Osia zbT_#o>7PljGI!*}K??Qg0WBHivV^NOFx%0-X6_eurY;dGu8(S88hnsJrA_&3(kFP*7P98? zF$;%@^Y8?7V(SC!WrQRmCKUGAknK0MMtGFxQ7H;69WJlt_XTxznAJEId&cXVxg5xW zEZfV3p_U)5%ma%T@~(91AtezQXUgIoEw=DKY0+<;2Aor1R{jJhC~*dBs>I1JW7yb5 z9p0aJ_pJ}*YuYIbZ0^84<14&s?L%gy?)*t~60*BLAY9As;jSa7AmJZolx7vX)*o*a z5{CBiVyIs5vpG=%VyINf*sHg2AFFFq{Y>~}JlgWQ>)LNVO6Dt?c*%oe=$drBw2Qid zn@-@xTH&RZa?7Je`xYc741^%XR2y+z^?|7m7Zv4?E{C+;3Ghbk*U3p{bpmnX45ek| zSJdJB^WSF0yb|}`U~nqz8a{}|%iu5KXV&wl3Nz(Nqr+Wfl7gRkNGMR-Y76Sl$RciT zPnCU>tJ%TYB%h_@(=G@^YsJgQBx#H096)FAQ~7QJ&IpPXRlRn;9OHb=$wUjy8I9GE-nt3(<_H%4djae)S27`-gk#w{B^l#Lj<>|V<~9vqV@$Y%Q0uwkwS9R z=TP-4xi=<#2M^i}ooEeGi(yRD(lJ#Yo}y^<#GfWjPqs*0PLGN)xf`%$d623SVlJRa zxz2r4CYx$+{Ae#mIr+3!)B0I=Sr9&j0=P!J4#Ki3ByN(APg5O@@_}+P3xJ`#YYpbE zV`C9DhaPRI2;vZrKr>qzRxn5#Whf|eiQ{0?@c8u8@$HqbKtYell1kb!<(x1r3eFHi z&eANf9*Aye9bY+llOl}!KxGn`d&0DzZr_?Y4b(WD)%&<0lddNZNs2AbIrucV1~(+T zf}gXP2Uj_2I{KU;Ha;>Q99-ae<$h&6z!E9Na7LJ+q$_R;#h6#4nr3bF$acU(R4(ep zcCV)}b!su5sd(&N_&WT~Q7HyNs+h*LZn$@ng&QCo)0U`-eo!`K+SJ#bs!?-VSKrt- zoR6*DP*Se%j-(<$=3lmf5FfWk5xuCeQztlH2$ht+rZBUQo-Yx~rYo(En~t>Jk0}AXGaMY>XoK>tBxcrh^9O|_KU#c zXBDvG1RB&K6I#wQw-(e7Y|_QuP0tGJdX3Q+M|bm#cPZzBIA2E>6u)h~8Os-Dilku2 zv1dT(4qov@twsWm+N|$!UGtE-dzcQK+@!g&ksUp=D6x=nY$m6QbwJd|2K@2*G1RR# z_ik!91Vu3pEErU|j&dtdlZ+yztl5WWps$rT@1ik4j>9CWX-9!h{DN2Pa{Pi@GWuyJ z;wGrZ6jsc94g~VkmG6VVl3OLC#nIgW*h(g5-yh_tVfR4uuOq}dY|_udJFg>3sw`+$7iVr` zp#HIKcJPb^59VYTx1txn{^Kr{$CSnS@Lo{(b%G|@4^$Oz{x~l~%RIR)4M>&6UUH-5 zcF?z`psvh=5&6d-K3McCz@vA*gSF@DdWYjHc<-t8E+k3yMrGL*d1EIw&S4vh>3xoD z+@4S|OPtpeQ}}T&bU4i(m0xYDZ#i8cfTho-(j<4}JsT$A?zX`Myul(Gf%l=Czs}nS z-sg9*gllt3+<%_kmwr+J4%X1Gm?8J>zJJ`Y2S6wRgHK)jKWk%0Mj!o1LML>u{{HzT z|Mi_~c_8NVfBK_C?>xgFC5HZ~|GtFI2Ut=E6CXSm&i}Qr;gZOfTbsCL-l7-Cmd9gtX>;p2c5D4wx6-xz$*?Xc5O8;x&%Hkx z2$V=i@bAvn?o5{Hy-Y*ej5 zU5@bw$&1$_R8ekE1}(2__Jn&PXXboe4yXJH3K+N2Y&CwZl?|Hk+S{CnesT$$KWm4` zJ6$Mfx_m6?GqmA!yyswQQ7p>!APZ+Yuu5W^Z-5(P`Vw%fs1|F~7TSLqPBYkml9HH< z$8yZ21oV^F=e+epzT9HBnVuZX+nuhNdrf!S>v>~P(8zS8ecIEindZR#yIb?HE){1R zHjekN4Ud2YE`P7ii_8gdycmnS1#t_a$P7jL6f-6Qxe3%tg~scM$sU(3SJxGbfWNza zu-uvFQ%+xMQ>@AlaV!fU$a%ndEvaBhH*qO4Q6?JkEluz8WWn;v_CUSbyoasYWde^Y zu9UAbl>FBVr__P?>(tey?fv`pb6a-j+_tl~ERmp(p~rR*#IvGiE~btd zP4J~h>GW!EiseykFH+<9v!V%4t1Bq&c)0^g{h1Z`U~5KmkhRhsKxSBMXVULPbPzqZ zaGRKOn(TE^fR-791^vPLi?qbA`<@#~G;14|G#vQQi*zvQ?P8iad-}@a4kf>+YV@;0N4QYbFBr6x3J{#d1#)iFhS+Z5sWB5A0dL z{#n4YHnHA*%bdR9un+_UD=g;f#u5~Z)PBt0EQHAF2zHd%PSi&)s?pSbrgeAmbk%8xj_&SQ>J5*8tZk>z_sIL^mNs@ z!|08HR&R7IfNP@$!h;Sw^FNZ^FB!D^16Qc8moRFqG&_V)aIPk#TOdTISb+6We>Ekd z#$sYN$9Y<=W;9Xb(Y#v$(0W~s|1b95GAzpW{Th8h0R<635T#W*rMm>Aq)TEz>F$o9 z1f)wqx|O2p-jzvb9ct&n=oj-cvyq0?|vonv+ayE!ziRPA}-u27lKpA9jK6 zT9W#MN+#BdTK~#qL@VDzL)ynX>M3_11my zc2U`mMYf!&_bc+n%|LnGc~28MsrZ9Ji9(#_bab|vT3Z}8+U^6{hqO(LR_1NNQJ=j~ z;Al#>#h+LdHLffII=!@8n~1vk6A=HECkjO~rT8`9lbcj~G#Ikk8*6kSfFbiuty%~> z?{IatbGmNxSKW9S2LQbH{IMbOTRK=vbkx+Ri?%a4fe6=wbg+XL{M)OwlHT%zXfM62 zSMa?G=>@^A()lOX!cVl@91tJ|dF^Gv_u@}@Qd#ijoz_!aZ~&+KK~0?vV)`f>dWTof zr^+CI;=Wvpip%%aY@Wr`?kl=hq^rKHctDnX`}@6kz0wa7;IqCbRS=WK+JXw|5vF37 z08CsBoG75}^O<2Zg|H=wn?!Mql`uhuzQETyw7XRs%h_)ws`JE|$IoBR&r$N(e=!@b zJCS2e{L$Kf!Zusp;&pspDCo9b9R_WBKl1e!)}@hLtGa`h6{dz?f+6HKF=N`K4-H?z zTTv`L+HEb6;x{#SoKPEG-^0>s)W?xf>+1f{9;D%^jMrgz$YwLi%J~=N+>RBb913g4 z=i|us#eM>l=2_?4^U2wU18R2Fp;ruVb`0tbfEKm58Pf&0p@GqiKOSb}ES(*}4ce+s zx1~tFs3U^W;SWI@Z;f1tv6TRqNDkaXxf>OaDsaxkK5PD~d~zl>pMlj-(n)ao6*-pq z(WwzVed0Xqr+l1h*YY)~0gS-QHrexxOSDQ)^r4i0 z`XD~hraSPr;N=ns!Hrw3dY(M}{iy7FZp$Syze^#1e){A^QD(WHDDvAVEa+7^^z!mc z+&Xr)w(Rka=#N0+66?Wa(KdZAq)955PZ>z$RJByCuGX&Qh2<~vnMEDjk1TG%nI?OV7cH@| z_V(zoWpJdD2UUFEEmnjhj-6y`q#OSn@DX>I^B@z zb?0B&e#k>>yW+zGJE&G_H z$Z>ag5Y1OSRUN3ecB=udBFP-$nWLafu`G*Jm8?ao`I1E$Hm*!bp0?~GmJc=)c#-*g z^`z=*Ajy(UP`{wXZ1{|?TM}j6vwS1|lwc=$d5VP0=^Dkg$IpZ_e2-~-DI!zS>{^h= zuPrzee`B*>|M=?x8GOY{H9jmc`>>nv&3+w&>nb1s56~@Yt1L%w(A|4cMAWt1udB-V ztiM-dK$EvhMyJU=ui?k*7!|cImSqnxA`~^`wsbN}77bf}Uqo9T0ALf1`eZEs!e(T| zS;)I}=>K}%SQfx}+WVCbLpQhacPF_+C8iwc^+%iZ-|)0D57>H{J*N1~|Ah>xioOc} z&&U7%rL79wRARF=*}nnfgRvhJH*%`~`3isLYaN}uKF@uU|9SD|uRE6ZczPZ{{qp#V9eh7*GRX!Gl0&{4AAA(&NiIhr%^LtzJLGvHmT%s$b?4ja*pjp)H3X4gKO!J0f89+aO_7o`L*Ziz3E-xeV z84$$eA5d_(IW&Y3Gy5<*Q9hIYs{_)O$^u*tT>CBu7k+#V)Lx3=SG+kABgFt+AF~!~ z)h#T?ft43q;#K`yy`g`;!QK*M>}8-|M{x`0Ci(B1Hp5w!$MAWy!O34p9YDgrBGIMX68V41IY#|?ciKxdT5>XFDGv){1%rNV z{+SZM=p81fsTHWx6~!*te8K+LOzi3d0y2(+@t>3be)v+rGiDh1jQ+35#Zh&w0`zv_ z&a$*uF{(oLDP$||2rJ9apKHW{Ed(gHaJfgF}h zG;^0dYHensYpqtG)V6N(PDw4t$Gx>)a8}*=w<>Wid*IwaVL5C0C=6@D3AL|fILeOh(+>FI;?mH^ra1Gu|;r@&Ac&Urs zb;ZWFuU2uXC#eeG0Bl;=K&9y3coa216>x^LH6k9!^7QG~@}6`$CBsC$hHq7-s|RYs za?}z^G3mcgG-Fc#2s`*En^ivio6-BA@ndZz;}9)UOW(VHnrr7R%45Mo(6z73Y-f9- z0JQNYyqDmNsOzz??Rz})z7+n>EBnq&h*#m<@p-A;)RhksBHng>oUg0&Ck*(1dHxg`-*-=Qiku;L^<*+qh{Fy#u~}ch2wz` zyXgxWC+qR^{Mxr`yvN(pcFBn$g&hi{Q(f$B)>G?JwT=jaR4${mBhsPHjtI)Ny1Roj zyBC-|vo(otDlPrloR*u5kGmG}g3T(PI2BHQw=mi&N1cl5);mLSNx7-3^%1YsVaK_b zk>{TE*TIIt+z@`|`+X^qGxt@bX0^Er5~v%p*`?8%A#RhNJ}K3l$;A`4_3^rY$ih{k z2{BC4b5;3OkPWwdc`;%&I5Qi>d0D}x!!sCs57kwoSxV3|THk$Edx#9Wnv}qa79Doj z@NOH;RT8pxT2{hKI05`fbYR|R->P=We$Q+F*KB~2=(TSspV-ZI5b^~mm;seW7>T^d z-hPa2NiTf#i}Q+(PafkRSjuUB`gX_MDG-m@^!&2Wgy?8k&E*tFrV4ck)NGgnEiEd5 zV#Fyg)}h!=7x-kgMU~A|b^8GSiM7{4vGmsJu+gJAF9{D(vVG`alg&nStzo;|{L6)J z;aJn!34_&4tgz`Fm~l4&%&>e+kwyJ1J^>QoDIF?WAc^ewk#y4mwr zct>aQ39Lk9;5ccQs$$jrO1xheTd1p8<8=KpP}P7y-OkVGcm3-%-4j;gzy_DrNl$qg z`*Ty-$i8qU=iw*(5#b%_Z z$IqbfJj@iyV)qzPw^A+*1-UInx{7ZM98b|@rZR4xTy3$5AP?At7wdd}P9>H3^eVcp z1qDJJE+6sOZF}=N$&BpL2Q;5to!Lz^Y6G<^RlyK%oLJNTRN9Smj+=TrH8A;n=1$8D z7C*FcQ)go;zZ`edtD?qwcH;Q5k?mqAUU+A+ArJ@ptFj%3m`<0Nd9wapZvQ79PQqVz z6S1OE?z`o{4R%g(yL6WQ^BSp{zI9zek%b{0!^;w#-sB3bN(1m&yAHR_&S1V49PA2I zFCZHBeYAA${f<}jLi|L0$4FG78E7D0JBxK{I{l7xKs+$>cPrs|Erwv@%IFwK7u>pd zh3kD1oj=c32~Z~d&i8N;j`=`QX2Zmj{+FVQ9Z@H}sXTTIg@VGl)=$*h^;m?zjn-(* zJ-Fids3o3&D{6f&0S?rhr1JYD4$f3*olwsR9S-^iVQ!DTp~QaOmwXx7pT;iuQ3)bN z;q|L~Xv17-eWG-WxLmhMWO|*8#cH^+VbHBNvW~7Bb=aDDB%Z)ID8SjhKc?>tfOfO9 z9p4Pjnu|Llvv#nWVzmMcHjm%9sf)`0Lvp&>12i7gMR-QAP+%6uU?cj zUZmZ2O(7Kb26c?N?^5E;;B8FI80@sd-K12MjXDPf6^PEfwqb+En{(-#=B``QHl?iw zi=A~v4|wX$V^lyK)ohXT+0~BijHrvD>^9r!NzvO=aYDg$d3S1YIQra(eDVcc9_oT_ z_Z}4`Qq##DEQ772qsS_BTrAI^&#)CG3($yF=^}~ z47n(zUnB;15ZzTG$<4F>`3($!G1L`u+*QBdNr6S5>#c=*iAIy@seH!oQ2M(zmCjp< zCiNJJP`_X^zMpK3nQ1X02{%VXj6S+lI8oI#!hyL4>X(d5#tf8rC;B#ax~wRtptjIa zMgGj4KEh8AM{_iAypG!oA$F~EBFb6X>RhBfCV^~wXMqOSrDMBeqKrkK5g4l2gk5%k zzWC)*@g`7BIDX9!@+zAxuv1U%o`wVD2r^5?$xVpclD5`yh!I2S>M&_lJZ{$!u&r4L zWHyK*|Jk{6$AI^(vZ&W)cZki8pv#wmalz&A1G|;}lW^cD2BRKdka&+Tfi)9=`lF^V z9V*C)NSYkK2z2S6u)uOyU!D5hS^%-|p$=>3=NGV^;wEa9>i18(nUgmE`du(W6$n&U z1-lYgPY*evlc>rFFEQ$Wc3yx#E?%L*`WyZ~(8w|$erD?(ZjcMbG2n9@F)vlEsoZjf zuy8#nGNmZiYRxEhPR(pLni^RuC^qP9@D2(mJg!QiFUry+NZB$%!7eBA#N`Fuce842 z=BM?&HcSlO!S7KpX+b$6$r)~h9TQgrYu$U84K6~+X90&nt~oo>?+1ba zweBFGp%7Y9Njjk@U_R60+tdkdxMCC?=wRlB2J3{Imqi+~D(8yJawEeqnG_f-1a$i6 zauP|VQ3a8yJ{u9&yx3y%1a2^iGuALOQ;Ln$qG$7EN0Vmb;Tn%^&tp+?sE#JHJ>u>_(QLy*oBUgomvXZ&qR43>$$@>k!^%^jY{2<{Fvd>hg8R+zCPfB@9*`yE#K*+TZ+ZO!LWDlW?{fN;R;2RIl zOnn6Vu~-5UNq&+N(<^^bbV9?8Tyx+6szJiQ$BAPFP71}FvkgwjJvLX#+xn_Klnr_i zHZHM}#Ib|;{OgxD)WKQS$k;nGyLm6t-yc&>oW_i_J@-yz3Hha&F|zXYZRjalk3xy{4zB?a{7wjU^nq2d1TaH_H2 z7ogs3>l0F|UglkIk|P|JsQn#*Nphr-t%kS z4Bm%UKNMc##$YO)oo#P=_>S@d+W^IH@aS0LXUadEl|>@)ywI8dZtH|co54+#4NM0a zJhaQg^&jFV3MvEVmGr`^^tWy%iQ$7ErSXqHcm0Vb|9gHUO9NVe(w+ZbJSAgXe;^TU8hLAp z{x|Ifjt3t8{P)g|ztckO97pK1>+%{*PthcSK3)ZNzA@aWgwH#SZC4Tv{EbFc&CE6Cjw|b3CB`M0s9R=9DX^&XL7voFL0fwmwfK919&0=0XE92mbwGrv zE$De@p3?K7FYg~Iz#`Rp`s3O$kiwCzJkK4R99e_1ZXHb-B-WHlBhS8 zw;ZUSeLu1Hy9AWV>n;!lc!87#F#lk1HQJ$kT{F_FC1tDW=FoJoCqr)%TV}fEI@iW@ zOSnRY$N)AmgLhBVmil%*xXqIXNJ=+t2bV&;gB)Q5`dMONCZ|_V)E>NDo-LkeuHq$kgKo&p;NOs02IDm68 z!61ZZO?(*=E)hKiu*sY0b#ruCv68{Wz;paJF^_-R>*MRl^1yMb!l={wu&8`9Xhpvk z(R6ZZq$0LRiFd)>zSt8_rxdpFOzV4`)=Yuc)tla77}3}nK>3TrWW$@>wrYQ7`o5!t zom5=bZ%MQ4n_lNUfrz8CRdJ&{o1+XyLddwh^*j;x()kU2mU6i`$ZAH! zL-`qB#-T`(ia|szkXD=yM7i@#N{va^e3F%yvw~$Y*W7>lhj5Azo1W|DTz)f&^Yz$W~R5`x;(qejBcN* z;qhHF9av`SSI_Z8ETcsZ>k&>TEw1yzzVK(60&Z)BLPyaS4Og77WU@v|%ZUy_sMJxZV+YSa;ZSbnrodNe3(ia%lrb&hA+>AF|EA1iu=m3I82@0jjcZUH_vh zbBqvk4^l=b1{?y$CQ}|7@vgW-0=Rx>KU%l(D$k@NzYYU>0MbRqOyQ=(8h|O-?bS%W zwEo_i{lo83iI)W622qb~>P7kB^Myz$DxdukEM+%;XC0qeL*B1MqjWO!Y9C8d{%y%LwTa9=@NWoMt zhp3fHs zw!IdOWq`n2-9rgN;V15B`&z0)jP6jl<?zZA-fk3)VEeA8Tg94G0vAyIv#WxjLbaOi1SkKRKMJ`yqdEa zZI-BXP^I9tMl4Va+`8@+ojq~;+!YBGy`1YwWJ z{K1eiaRguylbUW&m@Evq|G56@!l!(F3Xq%GTOzK3;+PQfvHX0%qRO@V3zPL_BW0Yr zdm!$dD@2&fHE2Z84rbOrU}tmfW|GLNy_^Jc?xxxpuPgMWxROWE(ne((`?j8dTTdY; z^LH?~3SMWJf>>AIkxx6_WZ&l6WsEWZQkPmF?7&HX9qj3y{0gaXMGJm z=n;8BdKz>higKELQD{v|9WxO9$b&6Bw~HwZiTlW<^5(M~%J*E=D)*EOxuckMAWi{a z`NnbA4q`J?8gnI6HMtWv3|3|_Yb{1iTboSelVz?oP}@Sl7H4DX;Z@D*fktFRir#U0dL;Ypv%jlMK8QxS{ab3|BbAWZ|Iu1-zYA zhIlsDZSGi_xU1ACTKM9q*Ye@W(d7KaJVGZJalxBUD*g z{FbC*s_^hwkQn4%z?fiJrSJF>e5$^jZgQ^0Yip`7v+k!v>+aOph#1iVpeZ<;xTAA= z?Zm~(k|Jd?2RNzdcIqGZvbElB52oN&&L8=P5x)5qNx3&4Gnm3ZgYlB}MEoIS;~`Da zaK4XQ)84eM_s~m^^)Urzvu-!}twtuh>Kdzwvx(=}^nK~^)lK31(gi&UgV-!7d|=1b zvu>d^(a(1Dez}^=*k$zfd+ociJf8>KVKOf5tGroCeeIbM4%O+i&Ip!lO;(rniCSNw zQ!U-q#?B%urF>4NAfC{r^AH}nPC9HDfXv5h-G|+-=9;!kQeWn~Z49@I?p*tG&9>UL zy11USr)kfh?!C)!JNdB~E%c;^Qb^^;bD8u;=W4_DuxYFE<~7|FbJx`JSJAkd2fvX_ z%m5JJjy(Ce@FxS*Rg9iu^KwxVxE%=iydTim91yN!?*ishQ`bCC`AoG1(KC{xqdTI< zejSuUN+@w3i4B3qo_B!Lua(Lkpy_%)u@=f$SY-x|H=as1OMkMSsN5>vLT$qKMu3gW z+&uk*IKPI+_#PHRbK0ONDFyR=VerMz!t50W;WHP5g)^(mCj&p~@)V(7??*C(y(AEh z0sCVC;+^G$6hhb&g~yLe9psi2lDYaxCTuP78pK8xg67XQa>c@R#3E9Krhy#50&nEf zd0}<=e5Iav$!@jj5y?)8iCLyot%1hn=K7Vx&u{InyFbKHLZ?UDGnBWLB%)IkU6IF` zmsT!z$xY^o3YnbU(S>_Jv-e~u{@v>kg-Qe%qlHDe;y)qXuQ&d87ip)j?c>gCNM>BL z5UWkncNSANp!>8Q2l+yINuCeQ->O{xa#-ZLeM{W3%?KJUZ!9Onmu4JWAcw13ZHq2G z^|k(q_*@r!eHlM5S2}J-3nZc{4-l4*dk7#+^5!Na8NUQYQab?ENzU^90dDWKyC!3k z4`YkPzUK8_FHN?K8nSs;5E%2Yq03UUe5J|CyrqMy0mupYOuT`%&QqoPZcVlDYKO-9 zhc7NhziL?-lVa4d9M4N{0mz^1AHfAxE&C}<*2fnXDoyXCm;z0H|F#Mo03snGSpH^! zUy3DIMCQJ*9(y3VYw}rVer_;TaK5!nncYD%eof8nb}8a95$HlJhvc=oD6nxYVcy+Z z^yNA#E?B-dLX$-2T6IT%qrzdd<0l)Uo}yGj{Nog~S4`PQr~ z9kCpx);YTA+49l4ll5mg&NJk#IQXqUjaf{(zKx2~1Gjae|p zRq<06B*-NnJ~GL^cmas>0MoXHeUnSEPgg<6{2$-=S27+pA4XS@&`q75h67RUnN|75 ze9_)nHuE|Z-x+@-f?cjc52Y;r}IVz-9 zYv^P zmqx1eL2fJV2nFGiWZsVh!aHQ6r=+=^KJUJ&=K=Xl$9~j9v*%}vnLE`uY!s`Q?R{HJ z$0)>Qo|=Jhs+L44db?^2a)XDvTDG@xH=)F_eoE zCJ3h;1b8CBm{d}Gx3KAI0V{EgnMRW26pL%6!YuAV^GPdk38Gopr@f` z#qI;zldgVHnk^#@t@0dmL72lX&V3O^B7!2LUG)_YFM=(DvZn;D5dfA3U~R-NT4a72 zrP#se?IjGSs%ShuKRe-!M*4K?Ri&A`;^(i~zEH{kp?4@$vr#Stgtrrc7PLuSiS*(> zWbdyfcSI_rO=zLrjl7$~sxAj=1U9R0ZQZWiU5F${@JTMO%~1QT*Hu?SSLMDTm`cK> z`^nw4@6fK#T(R>zzplC2^#kYpK^Q&84R26c@de~I|E_Kp?6i7seEj)pNA?P@o>P4B zQt-%@+nlA_tcw+WIb-2h_0N`b^9?@HLzDQH7+Fsg0xHMGpEdVNDt|ETP-gVdksVbp zqnYI@mYc3?e0X~%f>LnU7AE2_Z`XWK0tiKWw9-i7c0UiWTGa=>TNs2eF6mI%%%>!} z06~91f1EI*-~K)+TX2-9HfEg|S|p%xF*6^(&P(nor3)dhl{2ju!Ku(1(I!8+m~TEV zdcF-v*wA4ycFOAvb`kB;sJFVpmiJXi!0<{7iPGDf0;Ce?+f|~gF!$Hc@PMLEe9B0i+?t%TUm5#&z-_t zLAlA8K_xX20rH$?eYJo9vV$#3DARuDtsuw^nBkDg?Q#vR&BqNf?y6!&}C zj~D<#vNb!o(3{;(22DZUbC7c92n}811<`wDDf^iT%B}ry%n)>vZ^N(YQug-?Gi#Az z;tz&Sr!+k6U{|%uK#2;sQ#XVcvCZ^TTuOVXgeIs!BJy<#d45$Y*kw4N9n5o|9Qge7 zE3`;myT<3$-LcQ_X0OC5sVkl+`dGJDCS+@?sYgX=2WDe3eCpwHCqLt}SRfQV2*V5? z+ryF_HD7y&a!pT$Hfd5sxJTZM<}(ISdYQLC90wJ;x2)n<5;e_%Ub zQk5A*+eOiPgxVY(ymV1}E6H%jcJ1sro{%ZdGTS{|Ih}gtr!1~;rZbmF({;|m-Hn{9 zq$$9e>?PEqJc7ozoBXWbG5U7~eeC6A^lX;s9ot^@~`2Ku%04RLF zh%odw7bT`hJj8eV+K-SRO{QBCgnccXnkYwo4S1ra+EeP!R;p1>lA^Exq%;g$QEfIN zUUHB-fSG_D2^r_=Rh;TnF-y5dQvn`<;=FBki}6W#h`RV4-iXS zTl!f{zd|T+ZJFytf#4Dd8ZMFUZraE_m9jS#TQC*qVT$5fp9=iRFy*z1^|VD7qG0rY{J79=JAUwMj44u0 zO3Vv$)hTRNZ|8t`1p0(1jgv47CU*kL(eysk?%KPJZ>e?h+t6$7%UX#^w3h_E(us$5 z+TArb7sXc0f0TtCMVGiiY5j1gbHlc|1Ebg0jm0!-641LlRw)Q*bL6n|XfG{I_ zh73z^Sb`5DB?C%z&J1W$_ndxXCKp$z^vYB1B@Y>S?C8>3m;? z)h#x%w}9UJu$L$znfGMIn(>RpHG0;T_G9O+9H?24xluF$*WeHnW6=oM)45J@CE>xw zGQivOU}fz{NOmFz@o^K^t*G~ZIM_T|AYT11i_ik1HzL;^_%jHc?}S*90ZU4&4OwI zPF?@!i;OY+g)sIX!9Ie1+VX(1EFdu^<^epA)#cn`BHv8fYPLF?MgQ}60k@sE z38fhp8xv*G8{jInpBOmpD1mgw@`s4PEy3Da2|)NZC*1n|a(Cykul!#xfb~oR_yE0b z2YB{eZT5u72I~yxSDk;-am{n-sLA6@a8zQD$r=^a%6-tZPN^%4HPCF-^|)cY3=}d^ zYq-V;xKzCn*nDaum8P-ux^yP1_VTl>(KJujs|Jn*{S_zL1H=mt&iSVfk~hykMgv1G zL-Sh-m*_lWhW6KMwI0@evTEE7-gCdU!5Ozaq(u$TP5N4g>tr@~VnPfUT-~P>NZUqs zM`7^Tjeoa;#%G>SE+mIpz`t(%wa*hH+xQelsdeT}f`+n=x5)!Sa`91k~T z28FR{WtXLE9gwB+SeTk?D|*Duop^E?h?sHABLGvt1JJ{$JAY_MW(MVEN%jr5cWjjE zZ(i8NUe?<*B>=MFaR<~-=sKedW$Q`u7ShF_F{*x2c67`KrxgvqRxe8d-o~`7PkKVynqNJMWx! zEp7bz-urA+zh7m82;-mY%9FwbMhl<=QiYRMhK<2J@~yefk-5eKz;exp%6NOz?vlSR znH_%ka4>H|2FSdIudr6CPp$csOCKyG>3@(%@^>wLleuzv7m12 zF;;7O%DUSQVyf;-5zy^7fnH^{j`q}b`z6Yz4xL0x->rDC)Q-VwLI6JJbzrnHwuKJ3 z1ji_l3AUFrZNeCA#-R;%{5cqw1ki#X#Ll)Gu^~QQK*Oh|G@jUO-7?mqsFoq2Yx7BJOuQY`_KBKl$bcxlGDMTL z!7&D+ch%CLai#Ei@-TA#OE|Le5kb`!GNF`(o0yxaHA{^Y`73ZRelyqU1jidE9Pq+A zMvxiv_G%QYH+*T|1~qMMu+hCgE~Ycnfc_FTd!4?OWH(hk?$%EKgHc9&q^=Jzu&BXK zU_4zMse=PO28@kh=4Wrs)WX%<9cyCQ# zVUkK3SKMRC08qy`*NO(BFUW*gv63CGheR+E5>+7@7Ga5+Ec7M^%+ zWILS`J3q({uarCqsvY*A$CzQ*9slA|JS?2^=q}K2^mF949TSIATgMS3>k0<)Q|rv~ z%56c;Fzx6NthN}AhmAj)4V>nw@uI1@+?aN8+u@WGM!IrdDq#AqfZt)a-fv8l^Tbmf z;b$|b9PvEfFm1tdu5&X$4P7GW=5ASDD)%;6hzjfg&QD&HE-Y))Rcb};A!VIW$>t_{ zB1@><^rx`RF=#GFzUdDffvZx*7M+PK6%a}escseXV@DCkg_6fweb9goBmpXf=+=_Y zPhTK4b>>QP8l+;Irsxcm>X{D4&P5|IxI@VG}ZGjsYnN z>#Bl51=T4{sBqIsy>jp-7x&|t_{Q(~ZxJ$qLx z$iw}0XY)9>^59qZ;sKFEC&X^?TRd#*$YY;kP_6pId^scc^!$|%R$1SZe25mxRysAORjkQR^aW-!y(vwf3>Q6s{;5?%Nw=p+e4Q#(AzIan% zoq&3zYh-umNYugsRfXgo>`axHCeUlorlabP(@R$we~eDLh+;$i_Zf!rs5*IgGroVX zP_wCPqI9+{v1gDHoLhxM?JKQTaQLp661`q{3kr*_G_OxoSa}J?*?%uk$XH^U(U~xN zi|6az6Avs5qDh|ZS+2wg*qK%g6x6AyS#oH>_{>tS*C+3bANuXa&8h;Q#YSmsZkJ4p zp8<+;*!1dU>T_eNG&1Yx1$MiEV%bnPw3FOCEA6q%{#x7lZ?26!+kk!Fx!J93I*3lZ+p}})E+bpB^^*45cKR7~T(eu1#@kOU z;|wv6VkVwcYu7ySL)RCV+tR-qC76GSsyxwhqh#G{XK{_j{$#FQCc8tUM*I2z(7;w` zjMUi7WGC2#@=QTS=M%O!ScL`7kL2~*^~6ar@M{cQY#4u}9l;*w0SjP1-}J%9{7wbd z4<%xdxMK!5)tT7K@Q&<_JnZHx{lJsZ>RC5&ZO@1$AHn^WLbbYMSa?Lh?0M4=ACC{_ z;Vv2*B1z7|M^rZaoqhSnv{I_jD5}T`WP6fPPT4|PxFk$VGrv2S{6=mn@ZDQhAre6ema+3n0@huS9pBi&7ft>d`36&a0k~ zPjafWN6*sMz$syu#duKqOvX~VOdqaihc>VdyoS|%G?-w>F;T|hR;^A!QK|F;b7jg4 z?rl;xA2sg`WUwfN_2*K-q27Fe5(zK&`uffzW|#StD^afe{?F%x2>7BIhj1T=-|xQo1_@5y6niPFqlb?*Wu5(hfe)8R+8|;y=Npxzul;E2?=(0E zcef(bPWwn(X|CET_o^YZu#9ugWAT>YhT%^c=WP|UcX@T=2Kx1QF5bs|4HX;44bM03 z990qOisupDnY~Xbw25v$x6XVQ+pvOi>;#Wro`8YF1UIhd4MXY0mabdl`LTmIM_p$I zTef1cbuWWogr+g2HyrXJIaTtg=Q=*^+y~Ehej2D$hWsD(4oTaN-2=^0A>`n(-7Z z=@Z6EngpI3^|O%aXR<-^-X5VMOuJ?lj5)pN^Z?p*hsVYlC>01uw%%7?CKZG5sRp69DfzS<+3g5nOaiUDV}o# zk(tluz?4Xy?!l08Ideq>uf8)At{T|w z@fQ2WKAWi2O``h!1)7f!g;_IKi*CoSu2wvxQGWXv^tI3c1@n zDZ4NORHfLeQHg|3XMQj_d}Ch|+wI21hCw}U;QWmoQr;n7>9fMLcqMOgFW9OY*H)(T znGE&M<^idhAEE{?(%~?78SL~Uxn^ohd71c4J^^xKV_tg7%CaIoZMz7k?UyNJJv1^8 zcgLCFL56bH<4UT8m@1j_p%2*>a+(9a8x)F+y}>?GZan4kg7L)D{wk&a*yk2h;z_mQWLjUd&i;QpvF}kv;JZqw-=E zQ4H&Us%-fA^U#c2g<6b!@~k}9*HOookaUih&*}mPw4C=f(cU>S?r>cE=G09ssjE>8-7D zY8v$+Kril4RJSiQycfoV6UQPj?vT`Xz%cCObuy6iCC`yFZb|t;gN-Cj@`@~%PbE^7 zqRw`vM>kt4GqzjQsApMMg|&Mk)43a8tT>j8PF4J7qg1}{kB7K@1`FA7E%lh)(yHV8 zjPf%GmB2rz(FqcJ_HY(F;Hy(A+vkSAdsM55##Mv`G&Jm8?@0Hc6?sU5{O&pgfD55f@3;^HQvPP2EvnPd5k;Bpz{L-)?ZYG__lM zG1>5PEz;>ub#fmcetatCk&C#r-H`(;iC~pmgL(oD(G%(XrMhWIOpZGNK?6blGaV9b z|F<;(5dmIu4eW0}IN3Qoq95)yXY?qDtIaW@fkjAQ-A79B9&RnMFdN^ck&545iw5-( zBHZ?#<*UUK^a2nmr@AQ_74^5s&)jUfNilwN>Q>*p z;*sxMTX1>`LIY@n&WGzP&!#`EHPHUE(`aJAy+^v1NS8p5V6K~e+dS`LQc~_X=OkZl zxd1-sdMCo9O-{eN+9l(SrLEMp(paYkTUDr;bKSsdO^gN>#$ArB#X;*2YR2YLL<)({ zuX~R%dV?h4OmBHH55+th5f z8Xfr$A+ddY!q$~=I0#O7@bZ!q7Y)v$>u3k$S)9iLjbIyh_ z2DWg>!zmomL_|Lc(U?Nw<-mIu?Ogf+ChC0pu@wYRTwQi8-x4+xkGvlxI@%KQKjkWD zOY_`gxg*i<$P)@mhxrdC`r?yr2tZ*j15#J~+3Y!vgn~KDOKDJf4OJ7W{ICqP%AB~1 z^Fp>$8xK^zz;>LE2o0lK57sE6>enMt9wc7h@gQSGeZyU&oK-{ zRTrwChGo5;QK$UF_^bupR8M8JGXIrKQ+ewdNTuK6M=^R{&FIEU}ge(OVb z52We?^JmHC3y*vuv$wcZd{>T6F7w#0nnmc^q@WaM7|=ddLviXJ-eLCf7Urw{(7|q2 z9ZdG&BLQ05lnbjkfw%U)h0Uw+_Kq6(Fxbv)xduyoO3CWBqdI?h+cjuc~as`3D95C*henU7w8$*O zcZ8|iq&p-}F>7JKv^c4q=T!SBw%S2{a3jP!qx`=V7}? zK3YJd@g619dmV7j*oan@$>buPNi_C3nX69>lmcW`~hHq`Rk%t=Qyax-EV1(8z+C| zb%LEb=89dvb<7oA)s^+!GStb6#5h{?GRDAAcQ3vg{UZQ_w&T~HX=NpWkvauNMe0bW z^(CH*RJo-u?SDs3EGw8`Xg@EEi0WVtsJom7nzvhjoo%m$pgYi)GLw1SKN)uZbn%n3a;(nhL@;)&IKvq1d52R*mY<@4Pt)|Ad*F;O7_Cs5mX>CTrV^hF2MS{-JQqZopeNkpBge0MsvA31IGD#XjXLH{l|+z5 zHI@jDWOPk8o@-raje5TpHP9a?p&@;IFZ3^bG82OHkjWYtBQ?K_%$5~bEo3(D|sodm5+-^+^%Agu1alflr0m zY3jD3@#CCJpMN6;+f0;GedAQ_e8|Oqpl9di}*3$x%vn0`gyGr<()drclOF0h<<6Tv!vNNU~G zI+2yl;C4%_x>OUW5%jJu5;isGeS|PP>wevomGwm=^*CQ?a;HFqRzA)%DtwX!=^3;& zT`^;vKU@HcbqQQ=0q_yfh@~J%WNn%GWnR&+v0*}KTjThK57=g=reg15=cG3r%O@?o zZTPvg&H2Yc`7t)nET0fp9x*b3>KablZw-4h{jdQJ8@#eHVk=GP(dF#KcxhJF5RcuY zwG(;y!O9?V)7a%jS7Z`}ZS#k5fF-rwPCCeyqM2=l!nrg2JK{R}dF8d-h^DR#%+10ZiYCsf6s>id)V0OJ4C`}pOkWPub#Kd?e*iO z^>j0!k#$umbnP7=JFg%YasD11RM{cvO?uK$A>_OLlGF|UShsPNRJXyq#%!qA3CruG zA5TK#Lep*vXe7OTZWW1G-455^hPiLG*In}Of!4P1LGW)5?rU}BXN&OT=IO%GPnxN` zcC+g{<=?)&^c8`LbiXJ!Hvn@Nx(nuB#;vzN*dZ%GLsZ*wVvp^sQO+zN_+{bc?F9OL z03G@SW}?W2w`Yn`-}o%%a*{=%UQ$LZ5%F)nbwza?Dg>UNnMHKep0XI!mVA;D2O@UF zXpjd9+pt64?F5_X{g|dn<&!e2iLzqf zj-t%NvhXB9+K8xzv?3)lZ;nyX*0@Xk2P;>%?5Y9WwziWV<6C*+99`O&oT0Xmdxe16 z6rxz~4Cd{_C=V}~x3nLu=h)?OMAkrcr(lMkUA>G|Wx6+NYw5W)HbV+yj(5=$lUAM?kLw64E z#Vw=$-{*e5yz5=-`SN^VtzoX2xz0J)K6~%q{_V5(?%=l;&Xu5&!a;d9$-0yKN%c;9 zMH8449#3m}D)g4{FbwLYlV%^#@HqW1h zn_K7G6?M+*HPSyQ1aFTo>B~fsf!{tLVeM3W&4I1e@G`H`Nnplx!8M0PWsQj2B#4nU zJ)J9lZ@=F@G9yVrxokwG52`7Jz}sVdVkcV4%}%oNYOd9bdONXo`s`GRTx7aO2WWVwzl@p3d0Pl`@X?j{dS z_`7sQ?0c2Mff?fa%}2Oa@_TA5@~!W3S=jB>9G3^QmS{{o`@Y4WLf@EcXhbzTxAlUO9uy>aLNp&?$QYmIpS9z;G z880x1%1*9l5jp?P0cMm=D{~p+@yVN`>$BD1b$a?_AvXSNaHFFtBkbgis-+>_ zODjDox~95K<~T~si##if=958v0G#JIap&hS2>tsdE(tA|&oa^cGfWE)`uNUImkWOH z?H5*4kXj#?LVMj3XEb(srT43DE+eT;3TZVHrd|RQK;?zi6gBWi6i{d^o>jibh$%tQ zHy*vAAqEmPMG)iRMp(L#%}8S!t5IVV*j&}%bG`Ojgqej5V`3m3%*?W!8440I6H}K-XhT_z5 z9%t)TB~IN7;V~((c8-aAsskn@BgcWs?<0Us;2DoRfaGJp)GRg-Wc=?A03+AiBbSbT zy7OHU;h%tMQ>$YjcTY^$y1&FVc!$A)Pb&FKAS7GCnu5n4<;MxPYq)-x4J01=S-+0# zmrQa0sw3&SHlKJe&iNHMd%!;_V(F-?nx`ND$>TE1)iXk^XX4}^U-$iNt{UNkBNsp$ zPRBhryGX;>@`s5k&Tj8*8>J|+KwR!y;MyEgdH}&)Z;>?8M&Sb&rM-R_X)7Z}6)*I= zXz0R9G#wD(0g5iy4l6btxTwb9MaUCZpO9XGkqTeS#Hxx1A_vf{#iw?nWi_ijVYeFZ zjkRB2ve@>$^zDiESHe#~(-gy)Tqv|6#X^cMojLZr}Y7l*F#g zB&&bkq9f2aJj-KQqo_UoB`gapKCP*KI^?)`-i>b8{xBezW(x8GE#02&Vw}9m3%sOS zA)q!Di{Dr+n(+N-u38{4_*=QzfSW9(mdWZ<1(U=N7wlCgU3+Ie+iw`KNEYfnnl)N0 zUB_P;g4z-bzh#C+oA-9}OQ5llOf+RO>ciB1U@%$~tr`}OIn9x`7`*bZ%6wQq^sl zUr-*!pqNI?q? zycZGeP^`N!p~hM#kWc7t(q-n-{;%JHkTJeC)_l{e8!yC)p-q&;EU(<9@9YaO zF4x-K4AfT*o#_;v3OSl!pAV>-N~<66Io$PqiUdic$bQtVs@uh8-9iP=f60|17M<_1 zh7w^@l@4X{O;706;TKVi%m;ULkLJ*yL(-33%nPaIBfMR(X`E-~Q<8=RtnTExL@tJ)T zQJeshx!^BvI~DD|IUBX|gTP^`4tdkVJ8b&KzSzYR-0}kNL(2vHr9a}4DGw1hzfsoT zrRYIw%z8*cdYFsM^>(%F>cakDu-(f6G&fjcuqUV0dHIcqK&=&*^pS}s-6H2d=gn2IQV5>A{IFBTZ7>hj_ z72J(jom{JdqjeWDI$kl?Oy%08Q;h5kie(P9gjy94jc&q`D1cD z>uE-`0+|8 z+RQY`Z$t?&q~_2J?mC<(DW|Ho_1&kCMMIfKAD(KI-`F}~*1HI9JngPO0^(zG%SS(K zJk!txza%;D=IE8;ggbv;Cx*99H?K9lq$k}|G-cp=h$0ZIP*+)El=tb4%wmTuCP8&N zyE2v(s10w1f*v9^gv_d@^+PucuUe!G#BQ(rdRjHJfi>Kk7^+YG(NYpC_^0a9YPeBy zaJ$i}nN6j2c1T`xWRW60l-|Y}nqbK#Jpa>M?*~s`m(k%p0SglzN|XU$?;>;#t4Cy)6K}(IF>_ZS=AHM+!^gt| z@)QvIjR%9CV3X(tZVo#di@8oo+9$w_Yq}xLG>@b!q5cjLUz*+c?NgP^A})i($F2 zlxELch(!;e;l8|ku!}|@|9a6#mfbP(-l<=Ym)P==h7-tv>AvYn{7&bdP$c)E|HTx( zl41dICz5eTm7f0XbkB@}=2i`fZ};(+l;N;lThKxcJ{4ZGNC7gVR*l?aPk6w*Y5NNC zW#Li2t>{P013QT<sn+QSi!EfdQwBV=hEZPc4Lg8W3H7!Fq|`PK1%0c zAD0>rb>X*_(;e7c%$00FfpJTdWMk-{FGY0)@!8SdID@6A>cQvS{G-*UQ4q-iWE7#X zaxT8ubQ7BClJ0Fk$d*OhqX;CG4Uycwolw4MX3YL@GMm%pH(>n~mYE;(@sO$c~XZ-Xk8QnN{ zk^n3wmFYC-kf&`=5l*-Bqb4B32%7&;IUGkKhFE(~-4|8u-k2UMHC2Je3+mwKW*SOr z(OD*vg5GxVI(LsvkH-((OI{CSz5!`fM44L*XTk89x4#>D&yg2(pB+T9BvQP3tD|L- zS@Qm(S#5%@C9J7%GYT_%W-}y|#^v&!>3NsmiPR6(csi;4S#)<^XmHFvo(I$R)*?sf+eqRm?ik zF@$smO$5XVmJ!F-#DA46M?xQI2RU|OJG^gk@xuPNc`JJIg+1E4TxwM*;;MM3k7Xn= zV0A2Q5V*rKyn(Gf{HR4FvA1Qg&x|B zc)8kqPCmJc&X`tebcY$&qH@9(d8w;*Tx*y{*MtjB6jC2KE0Ueoe5BYyi_jlrVK}#$ zWFH&SQPn9eADt;{#bA1*!@7BnE=l<}HS~uP1VQY^NvUesGLd`hn$ymEwc}z`a!xX` z1R*^X;)l|i7xyjFb;`O}UZ2u_!o(TocrDBKG5XQ>vc0OVs$+m4JhFS;ZZ zsY_qNJhbnCGMumtX_sa9nw;44H>3YY1h;rXJ#wPIe-Vn=>`lD}|Ey1X)I*Hh6l^>D zVK=udzj|6c@KadODU?qI`g(Ug$V1!oInlG@#klmwK(^2UTDYS#l-NMQEya1D>r&)c z=`n5o%*X)$4-G@`t)CFFpUQSEJ#`&DM!M?ouH;B07Fu4~H8V-bDUFN<{^rng;elF6 zvR**n1px`mNLtk>_OEP_TfP-! zO7pF9f}RE7G;B=q$na;rp9Kj7Vl_I#n7s?CjZNfT7Doz_y6D-RY}g{-0de?luDByh zw%iv;3{4=1i6Q5#fW3l1((!J}xVpB93{fbkK`_7% zfwD4}d^YZv8G^Xmw3cZ=ob>pBkn@o3+k zOWQy7eD^zkO(-jb_PBP&_}zh%FcEb@_v^MU<9y`2sz2>No)ZoJ~G1(7OndH8~CYcEX}F z!m`5eHAN)jm5(isq>3)zsr&62QLlb*gIe{+SA z9UL8*XlZlqj?tawC-S+D9+51(;-0FFNl@viB0b|g4U$H^k;<>s)IbJT#A5cdn_t_c z-I>K~HnZYnuC7c_*PO7`3*wd z0)dMk(F{vW?+xYguLYDVrCj)buGE~r*9u|I1S`ARIZ4vO$tCbR)A&A@jJEPNh z>Z9*H&8)SpQpPA0vJu{w=k3F0W@a-$za2RX(Yn5S>Qhq0o6pwRvqFW_Ka%jekCGN^l<23ApoSi9%#FaeNa5`%=b?gd zN1aAXdB(YB`1w_pWXhY#cup(R>-rDl0J7Z6!{E; zi8vsKu;$8!hK5Od7ldxgodTZD=mtE^<(D$?t)Rd%)Vo+TYlbLh%k4Y+z$FKw&08?M zimRKi41CN=8V@5b{jW}F;TIl}f^^}<6?X=_uPh>ix)pu)9M4Z$^2&^zVdIskxE(sL zUZr7>z6lB@c8)!)TbJN<-ivaE&5to?!q-TzLKOv_PF_a(eO%%l;9IM{f|0_j7xqXI zHe&6X`E@_Qt^9Cf;@%*kp-pGYTa^fDzVHdZyjbvo!uS?rUQw^l7klY=?EE;l`I?%r z(Hr;tD8T^U?|qdZ7$v1K@9TAbL?G)hlN$~%AF-Icr$Rf!N5H4*1Dt+3?C4OeX#uFG zS3oC-{c)mW{x>C+RtxvD)Pr_r=oS^NOpq4@GN<66=W&y`#2X zEdlgsYURWsEMn1=P*UFFU=jh(NxLku@bm7G0X`EhjMeSX#)I?N%a|*~gOVae(zMJ} zG*DK#*~h^&&s;9d4Fro!DG0DGp}LwyOH#>ZjN#8 zCf7IYL1P84f(1K#{wxQum)1r!M)7`WHYFp}*~yYnOf+{O>H~8#Nts;u2Ttkf8Ps%X z_ZjpeKOg&QZ$PljLM7+ibHRPvd0QeqOE0UTv%eqWC%ZCb2j7uBH{5#QanU8Yc}DCG z9Mkjg+C@P@Ia)gyXar{_JIbFf?rS06FUk)gC;c_`uhiE`sW{)W=@1H%dgnc<_X5{u zBxt13L5Shgs$x%}?G$8;;VBLOYsF};tI(7y6`gw1Bx%;&A=>RThh)zsi}SspDg82g zSZ6|!m;X_fCDhUbAWn&*v7V`Rz!#dYa#le{2zxDsi=-tFt7g`g5|F2t<3POqRtSR3 zr>H}P(3*r?n)!qBK&KFw~#G4Z!>aOx?CyoBHp1FPG$G| z2)P&v^;va6nchh{2HA=X{@5z?H8(|%7)vVimSe8R;yle{~A zzjdq|662b-+s7q@^C1+E{h0kts?BU^*SwpfL;P6cI^wff)IxH>bQr&<_LmPKMT}IF zk}0pDtc(~WLi&IYr2~pW%%T+YkGJ#tX48NZ6V9n8F8+I6{y1Bo0M`dQ>ze>#VhAepA4j~*!~i7FVnj_9u~>hN$d?YdrcCrFH{j2; zFwSe|=>N;)I^`4M!M~Se_@j`5LKSp*yex6nx~G4UL}uFQ$*(E+R!}M_X}&ylLy3uX zo!{FBavb2%MbBh{yK$D>5lb3~ovTrjtYW$}dAh%{d9k*yt`z1bSQ>cy$DNx9E)lA+ znc4aFocke+7b?+|#P3l>BwOI7WyVpoP*=LF7DR*y>xYFPuXE+v`Jwm`9>1q2IS=Id z*C`d+q)a71*P7CDSw4uHo~Qw9f8aLk9ojpEmpK+>*?R#RX$x~Fu5Ajm|77P{gfnLIMVQXS>!e;)B~fjqkwNC7W# z`Tr6~;2O5?H|GBk$hB*MJUhVut?B<3ND<%~^4)rb6e8va+%psNf2u^_EnRgaEiJ8> zgai*Qn9z@3-vMN^DLS2f@&>dCrD~%-cs#iPR1eok{|pp1no4``FY|_Z|xv<{pv`0`*KhW0Lqu+OeaVXSPmH)PZQD^u_m>rv)OH(lk2HDl)*n{sBp$ZKQV%=cJT0XW06FEZ@rzyF$W~?WTSu!Gu3Ng_ z)#sjDqyu?148S-^kPkp>#oJE_Gg7CCz z`jfE%pP!*XPoH^-dGCTs%c<`U4vrh6T`E;>`|9V`?JH$$qTDGyJR=gbY_n|vonT9x)Wf`w$d(A8bqMP&Oq_4z#J5=y|)|F{MtHgS~V#@t+ z3%dX2a*QiT4;OIwwlQc>2C+MG-1be!OMDhS$Y9(XQAiQ^$)Z!pBADUK3;WoNL7$zyl$^n zW)6uH#YJNhc3cm;!&lw3M5Ll50wZZtyl35Yr{)-50)CrCj$%^TQ9u6DF6~o$XS=nL zamVh5TFO$`SS&n`f?d-F`r!R^S4Ee-6mTLt@Hmcn{8YAy-A`S%oV*+d zYw;^>=cYl5TO1&l2xxq&)We&Vk-_>3&OpX_h|my9NyrYf>x7iHPaiQXepV4+)IV_E zD0cx89n4FbF5EXJj}xIHEfN@)I&hAHiRlhf`AX7@9gqHZKXMxdPv2|S@zRF427a;w z5WvW8JXL1#)3l`ZvD-`$kN|_5qdR31x!}$f(}zVMXgnBp+Xd)PIUUsO!|lQsodhVN znVb>9{qcucdbC#ewalc1%N9TcRkl->8hTz?hnqebO!_rcKTII&VK@@w&u~DY;AnqR zuR)?(f$;CpO|JRfSOYgTOxSQ%c}b6*_bKmyBrV?CgW~#*nMr%24U`pl#DfV{nCLHq z*eM6NwZ^8+BbEsJK}lwXQk04fzl%wzn$FA+Q^M$C-SSZ$?#OEvpd4?WKA)>hNlK^E zjRs;pm?%@ZH`8R}-@FlTRYLDls|yT=T_l~meEDn+#-b^*`hJ}$?zueGptssJ_$o^fHm~W=<`9SDdl-bPg~Hl1uU548&lv35qX@v3Qkj}-qqRnbm_QE- z7?V^yyUoS`ZOa(#lHjkI32_6tB6Rn#au7BtR~u=iKe^mM|C5ihc0X(!9}_sPbM9r^ zFuy*Yr!r0LJ@znXVu$&uvPR@8N?!t3sJGk`SBeOcV%(0zdLYqhY1n@8#NKKwKUxF% zgC-$R=)CHp!IRfx*?Orvea@&O&&Do&C>mc>WRha7=C)ZSeH2^0W z;=YoEtBI(h&9*l1ox1OW3Emt$0&K`9M-A@fMR!fs>X4BfMGvU2p(b68nQA2A??daxr+9}mo>=ZZui2{qC>_X@L#+J=jygRW=_zT8_eY&+p z8<6|@0U};QBu_?1nfc&8g?Lw!TSy7vdDZ zE~7B^3R|H$D9|Vh(0lqWW?sP3c;kyp1x3Ite+Vh>4}4-7T7K~a&LYp*(6AbFXv7Yv zax?YYol6vL5>ue#W|<($YLu>5-@nil9i!y_5qo6ecmN788b{O<_c-U~I51>3BPA#{9jdN6(qIz7rd)LRQXSs;^<%+6ZS(=F+zP~8zlZpzM$F{yM zHT6>XhRfDQi6CkPbF>jktJOJgFBgGV^#yKR--w_-;9gAJq*%d2p zTBa|_jYt>q6|JuQTIk@rHg?W4A*e?Z^z-P{?0F-#bOoBP2}SXgmbjow#v?+=;4`z- zv@DlKQYH51zpxz{C8{xi7|s?;-hUPo%J{8z7=2IPMn(6P&=<$ni@ZO>&`s^djqX=K z`%u4~^z!_O@5y8)l!h^Lp3c{-Rj$*(@$>b|`V2Mx* z$xKF9AbM9O^NPX>`;z=$KR4`)ko<+Mg6>E+N!w+n!$l1uhVM%xBlL~2KqM_~8H12> zvf2UOa0;#B)DjnmtaUS$-le4r^>Nb$GL7UPpkZv%Lga?*k^kL|Y@xv3jGI`gc#PPb zc=tQK#91l&Yp8Xg^Y2tKZr7+|NILie4sF;Pfbxvb?x$WN9Q7#amy@5;1Uy^vqYHZE zI=V*hf8{`&tMF|F4j+VnL6<}p0Op%qNT-oMLFXQtrODbP3U} ze=1(%hE`mRmKMzW-}Fxu!^`rzl&&N=;)08YS_v6ds5v zbyK8n8+y^g>bY@|vm|H^*nWdLBqXze~zg8i8H#?rk?HmC5>dad`Iu^tp9yB=P#I;`VoMa%A!2)ED`P>0+e10ClLS? z^L8mX{sVh*d;sVxqG4>+J2yi9M^>(H2yj8c1bP3fKw#yP1A;Uf7LrPY5ct1toGAf_ zi)8}><^Pc*thYiK_^{(BXq^zlP9`H-WY55^Z%TE`~3?* zBm>w+?LnjeG5Jv}U=o&6l1dl|dHnsgTgYHafW z%SpxGaXQg{35OCd>dQQ3-t%bn8_Z$J@9T5b);#T>(xAgIU2C|)XEjt>#F4k~J;o+Q zjbU1h@@DR_?|_At6^>)O`M)ypYa)ulWVMdQOewag5tGeATBBh0Pf>r25E4?y4muyt zagCIY5H(}7lfIUXy5tG%`ucKYjm!GQu%WY!z|^W?w`>99gL7XKsaS& zA3Zp{;kz@jeNpxa&ZZR_%tQ*uG5Pm|tgp7DagEM#Y3A3C%?9Fed1Kai4GKLvOpCo{ z^mR^}H(Tty6V_fJQX0uDfCm{NAufV=jxv-xN5tDFDxx3tD}aWLM@O1_tGmAMZTC71 z;4KB-36YUT%@>;k*L@TLf~G4F<%s3#XrRoBnA6hyn)h0JE?Ct?+F$|WEP=EK6L<7m z(em=nWc#qT<($7cJBQD@CcCl4ayOcIJ=^BMJXuYVo_!ACp5g0xv4erIT|+?uPOI(1yxT%&C= zyGDB&0)WqtN70n{q~7R4^E^Y)ZQfv_LEJGVN1)n#>iacY2ZXs?M&5dJ9*$tj2R@2E z?aCoNHL3%mHc3&`JQEFW&r>GuU@n;X9Ao+z0cBGwff`QJ0l2i*J7K~{doY%heHq@( zeGpu;s>gLdyKlT#?|kDp#bx5MNU`NN=b-vo$hM97Cp}_q$dnSl0%smHujz~$A~S`Q zBit*$eFCjW8u9c13yeBjEKSgHY!|2+|Hf)~hRc4~OanwzCUn45z8nY( z9Gu!LR8guA`dpRxSd}zhNC2Q5zQ}Sl+p!V0x0H-}JOecSv>9EU@r+O6b(wC&eOk&; z=Tv2I1z*vh!fw<=`xM?H0rb=DX!vfpoVYe#W^|93-z5q-qcd?`Ga(v}JQ$BmW(0t% zj@no$V^@bM@Ls#CqY`*d#n6K z*RaAIN&InXu6kjA2DP6;bPrxn>hAaPC@bp3VVRNGbw#s&rKx^%;p)(gXM;ub<$ z-bl*#vycmIGyGoiYjrFgr%vdx@8$8;u0bQ45fo@v>eR({p5k+k$q5TuwCAH~K1|jy zUyY~q51qWX?QI;&fBFgZC67^(YP)IQo3RfFUv9P>3seCGbURZb#U%^WnY>O*8Xc6U z_C}2cPWI2Xug=%5cF*PGfu7hU$RI|>gGc0z?#C5l1>+xDtP$2u$`^2>Kwl|92!oS_ z`{rk5^+gwX%iXCR?3N2LpKeK&%_8(&&bt_d=9l*lxa5}w-)vK+ROvUmd(V8WO%xMX zCy1zX4#Gp9j^z-yhycR#4>x$Rc$to4u;1I2=aDLw1NkuV-SACPZxf$W4IgaTjHKsg z{dnhzu_@bvJtkDNIjd&i`O^kNi+;V_1glT3TH#RXG)Mog=GQi5ty`i>A#_Q57(U>1 zDQSwf`$#t?MX2rC=so5tM&$mGg}h~CT#`tyA=>1v*k`3@^C6kVDFK|$DxkR9 z=?QV2=UIiK7qd}fy(AXO?M9o?B6tSli|)t))82z(3etf$3-w`imq0M3!LKzPC8)?? zTllC~W#H8MsrSJ*!zp?9fAmX?2ZceJH2S4aPMz9U0K_1!j;~!SUQCB$0|hU(fk1eY zV%?}98soX^N+1GXc%~%gU4lR?T>>3uO3gpucC)Ji*X=<>VyiyJ-4sRIoDF3y2R}j{&(hEy>_W{z zlg{6|H-hGEmc($b)QyAtR>%#R#^6(Qnp24clDmR;y?LA(iO*?Ux|)224?qDEKX+)j zX4>j4q_CB%_4Ra~`J@GtawtnjM4ZzIEJafmCG9P?D9%3zY5dE4S7$!<-hWnKyiz$I zxD@KMLmCLBl?^{Tge45qo#ecW2ys=%%a5H^Zx5}V0m7VXNQst@i7W+89?c+^q$WaDN_dUoMw7; zQUQcp7)JDOJplsoJ(*3OKaQI32jrzdWo^%Z4+Od|z9c2tY|d1d>(#r&Ml(Vky-~HQ zD+<+lQ1>EqY}L@B1g}P#cZ6kTY8{{ylqM1s;Z`yurt)4`BVsEptWd|Duiw6S$iH}6 zH8$2BCU_J27s|#41ZTuO@#qgA5yvCS2m6+L=2`-QCbh!QJ!2pmVnP!>CZV>ZqG!Wc zzL$vL{|y0Za&)mj%i=Fr>DNo&X}Mdn!PB$#6&MJ7_yIYFMaMRY*A)W%QS%K5BP_@8 zdmy!KI3p$Z&)MWZ?yo)aYr%lO4s-qGE&|_^2|ie*wM3-V{Bd313OxZFVtb(zgtYv9 z$^BYeKfM%VLg=zTcfF1q%Z-G=YKy;CCk>^KW>P$zp%$Gr2lu;1mc-r zV-T_sU%1BZjb{(XUF~U50dTr*GmZ931LsO00ih9wiiU<>cTv?~sjeG@5r*TNzWt8~ z!PMBxCsimE$_7aLc!s3P7^88OAbl3;iY`E?b2=vWNdArEr9r!1k%PsMnyHNaZ2~ReAYPwaE zjaz{?DN|ziS9uAKBkyCm{D-W(6_Td})RjC8d;QLh)%jZj!2m#_@(U0{4Dhe#J_=Mv z=djccRl3Qm`0H!8kRvDn=>*A9Aq>|)VC+tpp=sD?SEFj1@!)bgvkM5I0x|sy!_HpsoUxk?+9M)_8|Qw=tYEB=_`-tMbqjKxMYX=)X@sc- z@N63^S8!Qg9|0B@J{8yLN-L{)A?I-|U)u)U!=Q8FI_Ic6>>=9oV0Q*!TxOUZno7%HeZw)cXPg?Fv zxONb&CyM7{Qc`^K0*m$>D)vNx6XhGNU6DY0wjY}CpY5TQ%z7oIRkqeXY$n#K0BoEy<2IB*4(2U+Z_c$Sq8|#vdeSKNh_9S>>kUxI=iGrNTpZ3 zx9G;>UxU3)oDlD_C;2nj*ITo0=Pld%d*k;s3_PMS9>0maj?!LR{Jx8e9yDm&7QtF@ zRc2}Fv$+It(cwT>m%UL|5{qbenN>-B`*@LFN@*{F_jyG50rz(F9l*aTKiaDEppmbL z)qg|%%Kf2w;@c=eAZJI~tkiUjXvDxG!qPhn+F}nVo#%jN>HO@S9epDTL}ICf04uo( zE8l?GSzk5Xak+YbtPPNly)w#{POCQ@tM%A-EvBU}^L2Lf6>UHglUtd~;pRlg+0iyk z{umt{J+bQ?J7K+dhSckVoVWBUwGrqQB;eJf&SbO5k6UOlk`3Sg_Ws@_(3qHkMWG^s zsaz9f)*mpiQz!N9I`u9#p1rp~_D7p@W-0>lmS+8lhh5N*lm*cx0D3FByJSh$_>uA6 zXtq2t*SXWc)hc(aBuVpC2I?)4hE3Ql>70K-Y`I3B{*F*RvDhk zBpkMFghek1TyCfUNds2Rr_k+-6|v1jv{M@lK?gSW8-dDgsOo;hQsrv9+icO(&Vv zctf61k`1jwVyQF0DmK&_v<=TbU$cdIiqoD05Jbf*s(Rh+Warg+XE6DpVuu3B+>4qH zgONICtC5rFh&2r$^K6d2v>AhtXEO8=uYtmv{9!}=cPzVp8HoS*~CFOw>+mX$@SvGVFVZHJLg_K@LnXa>Fygw(c8FW$T@0hF6Z&K)lU&e}RC@^7tV=m; z##{UAP^)Opl*7^APFguI`-!R=qmrBSd_=cJivNW98h02F=^1^3oW-z&)@t*i?^~}~ zaolU+VWR~a3)S)#i{c&WtZD~ZhY@V_)e0mFX1fz&f(8pY)mOOzHH$?R79%uysgU@u zFhv_wb=D!zo@s)Fjug*P5Hr!qE)E_?eCcshQ*7RV%hv4LoR~Iub?o`m#3g)Q@XI6~ zw{9XXTRdWov?qu4jPmNmnuHJAs&uNX4_4SkO{PD~p?qU$(5mR<8<-myaYU~y}^3K%wGLGN4U1nSW zj997*79G4#jdxm%0pZ0--9d`iJp%G%JVtn1CtRvM>XW3loYXO=rnI(-w&Zjw9V!*m zrAKfV+10pU3qLb*v+ssw2cuj0bFBd!p>-r36p_0NF2|6?@%MC z5GfOD=+IrZ(PUp;HoI3$nBI=mDm4ay3OU$c1vUAO1LaLORJ+~!w1mX&KqO$LHZT_g z&waFd3n8CtbRkG>^?7#oi5-;8?<>HJz4k0-16ja#&SN(>rYH&`8H!h}kLyt3KwgK= z=rHwCO0W9(qvzRP>e=Y;3(1X>()LX9AY<}y{Ec$EW@4M6@6=h3C|&VuU*s{-CPk1_ z6OeLgd7qTsyEN905wL6A4Pja7E+}^5x*{gPctBv=9;A}Z@DTTnZCEUST$nEcv2eOr(Jf9X1s|=&%^2ZQq$h+#=GJsEq?*L z?Dww_L3kh=KDD}C5MEA-&!DorJ|cKpG0Ex`wkT@*FkY3^`Sw%ZdQ_({5{2+I>8-H| z-OXW>^XeVms_oAp_9z9r41lj+nPmv1vEp(0io&Go?I8Xb;DQsdSgJ~584rDUQmo?w zqKHU%sx%(Wt5IrC02ah8OM>?JCp$fmA{q!{ck&EKK$gu*?J+- zC|0rLGlYD&FN4vR7WOG+X&P31G~?GFO-;w{B!or<%3N~6u_;$fT=FSz`l6C8B0AMs zCyet>mRY-Wl7Yj0VURZs{L^275D#A~*K=oHa!3@qg-OIWs560#qN(bZ7Q}Hk-77y@ z-B_5ym7=Q~zo6kTO#O7QM$mOD(axU{9e_()BUI-Gkc$jkLh|jSelOYdvZv9mvs8I; z_5Q}n6j=f5m2-2VK(&Ah$OG+-;>ck|=TFNl1R1%VV>tuC6FMFf^AJN*v66r-4K;5t zvvfBs#e8Z*^dAQtI{7l}sHgb}N3trOy8U$%A0Wz>qaXBBAKthG4=7UA7~ZwDNw1VN zkI2_#^w>x!o8zO)S7cVy3>2k@NoC$z-I%wOt#C4H_`tO^^b^qXgIY$P9nr!WE>jlu z8TBB&imQ=K&*KdyV>x_{fqecSN@Yd5_~|$$kB5xG#WWBqkSx89g6BLy=sDTrhtxl& zlut}hB<8ih_~6|L< z7st@D$?XU7ERM=ZF9Oi7y7Gm(89*@VWt?0Q<~kOSqXB~1tJtXV-ZPhxZ@ z_KB`y=`gikNRgNRn86UZ8&F8T+#7Gz=&rZ2NAINm#DKQ0aMY-4FO;4{#8jBI#>{p1 zT-26)-6Mc8naY77Ft(^a0tme?FSg>LSYWJ(K8fCOJFnW*&r{B^Retj2HQ$tt8Z=hs zib{uTSU8pEzLYdM0|tm=5uaGuPeLo^$(VA9Qx9&IMBU-z!y7P`$?bb_!AB~`8fW^% zsf?AqD>k8>miZquilJ5=dlD*}xet8$cmovr?7J#oMdb0$51WUKiv`}S3X%2*$$HYL zn0h9k6?WAc()E6U*M=Hkr_dmyVRW=4T5sAB7}3oheQrU^$lYYL(-a!8aNFP!U$acQ}-Z=sMeQC{jr`KZ8DdZN$bx zbMs^#5&<12JwC|*dFb9A`tD2Pyb6>3nAdU6+e7?SB}uAhIg@^#Z@-$5g-MmASxR-0 z3#a{HjXE`Oi8JH=6BPh(p8SPE5)bqzxw%2XyAOhGps;hPa?`#GWXZt$wSr=_Ch=jG z&L4AEgAcCr74TL>3L@kZcv=MDuLBhwqRVHcs(Q_nUl|BXQlsG!t>+cVotqFkB%wFN z<>v}aO|>y`h zS>mdAKncm0)c~Xjtxlt7kdRR!5aaq($b9!fU6;@2>dgBVGBe8t=8{RXfm*h$EA?G% z$l-KvZB#{WO}%D}hNA|u>JiT=9&j%WJVH_fQg1k;>`JEx1<){*=?s9hsT6Dw!j9K` zMy_aGYxUUL^9l4PCkVEkt2cQ-Xy)l9TzfWi!^N9=r7~L2de0mb zE~ug2F;>v z>kZmO0;y0OE-kk%CdKr-DNs@`evk^T;FAQ-q(-yS(}Tqgahg;SR#ZwAz8{zN**fQdoTRYo4MB&~y2dUJM;KqDb5^Lmg^E!Hg~#&9Rro z7`U?Z1^PQDc{c#Zgc?3%$$BPb@hOpkLErJPoPjorO?DAb(v+0;tSB}9q{>ga&PyR9 zQ_zy8QMT{eL*kbu)E_Hn*CUzlZ;vz0G^zz}kq6(KQ`y}D??PLBVodxP)km+uXe2yN zou(T9JiUKvd5AYBS!aj;R@F!;kQ#+HSO#+WydA{pQMdASmfK(osHLYDHeuhC6uIC% zz+PN|yw}6hv>x+O;7MPy*L_uV-_*&kB9zVz1Y;X{20g#)bPA}rmJb;wUDWWLxgsjN00OyJ$P4Cl6d8Lyz3zuX8&lV^vG- zRn5|IW#UNqiO=(`k~-f1oxRLE4&eh`wG9Rue>`Hy_pm)#bqvGZc$HG~F_JNPTq96< zW@#9)oTo0DmLu{+K(4H!#bKmQQ2XchEIe#}-G6#iPdZjSE>7${`ij6f?>6c07q({= zDmnSk*#*!X2Wn9;kf0Qft+Sye*4u7YX9svgz^jHIk%#!HKZ4QL^b05;)TQH`f_J$Tph>4c5}zVioHFJXPZHadBzv8AAGz)BBceR)0ZZ-X z!?qj+%ZZ3TeES=uS0?`J?d1Ef<8~RsOtgk8EL3>CmYu9Aw6v$M6KEq+E#mC@0W+&i z=n%{eG^bBjQFAZM?+!=$vE@Nq!*zP^BD1kjWLiF_9soSmy!}l z04Jb*YG3fox7zar2#ti(Nq2W#OK2I*8d~xHL)}|PRng{NJ@ir zmwTX`e*YEeecijJeV_e1=jCEq~z4lyd&Arz1JaaCW zRvKPxnvWvRG-#!X3E^HzW}hrLIw?DskN!98SK!IZv#CimqFF9q9od0v?x%ih{Q9F;yZusO?2Ha$W{ zi&9ozLnY>%RK3Y_5_?Sdh4^D7^=vOkkA$VFX@Ibkf$d@{L?Bs0?Gq=y)mp}@Xb)F` zQhkZeBr@Kb(4ntXuF)_wQ&;zE%r_VvRrtQ*GMi>Updd#`i|z^nMSY;TU_DXPtB)XCr5E+l#hO?NOGeAH1deC--uhHV{1x`Hv$ zRn`pL*N;%(exZ=L_n|xg9s~)d2%~YYtEjQV^%sR!!yw|`^wu9=uhj0XeD&pfe~EJO z#7LHfv1_1Ik9R2mVoJRPD>f#`n`_pxh`stf`FbM-R&?hcn?_I9`{$F_+lYp}AK3P& zE37t$!4J#k2jhAQ8Q%li_&3sY{~L*{xCfZuOk}Vl{(|A(Z6TS)b?4{jOX)CE{-G3YD~C zldoLmlriL2Tg6KV2@9VlP1ar_-KB8j+(pprZ*UoRyYW%!Ef;huz2LC~{u zlHjHPf#Lpcwk;r*U7Ys>+x|bGf8e-C0?M#=|AR*OeSh!400d0z{E_#e@g7!Dz(ro) z>qxLAxc~a-p(w%AetKPf!rwvk?B;X`lYw)xWih z1nTm)sQ%$%Kvlxf?=?U#nB@QEDE|Fc5~zzj3=+bh9{jI?BoOXNmM<~c|HY(G0(E%? z@%hi>1h9a~w!TUI$yf0g>JGsM>Jll``qxK)io+1VWbZdAfBY9wgap*(|3dRcN~Vgn zadKu~sa?$vcvuV&lrwS=Q~zU})HAGZ;K5+8w`=j3;Q{rBf{A+p2DCBA$jGc&EjA1{ z+aH-U{MYB`x#~Wx8E#A;GldkFIc2RV9R5-cnv>s$IT{oVQ~h!sLTF$h4{H{|>!3az z-+KyZV1VH)%Rre@j_)+KV6MFNc5UIdCbv|&&Q)b6(S(7ZvV!VVW3qI&3<#Z5`la`z zz1KW;OxgUV>=y5=_vsf`cnfblK$Vo<{r7*LSR9C+R~mH5F1Mpaj!e|YG~Ujr`mxI- zq`!(_U*%qXVqkp#NA=yq-GpO1Z_K=c&P?p;`A-QKy}jlSd*&b9&-?MQ55V6VUudWo zzpq3ujVaK6=7Me+gKw89(F7gpnBGrZm&ZjvQfZi8nc$Eg)!_eu;!3N@NLjp8+QIS= zyMvORURp8=J;V%e-~FI+N3d1{9-U-y{oUG`)205+cIncl2#>7|UIY8AVYM&}g%FHW zijd*FHJOIQ{hRw&7%u2dr8#Me^iPS_wzk#r0u9@XZ&*3nUu)S-)pTwzi<D9~owTgf}D{HIRY7fT~_Pfhz*j9n~B>VwdRZX303B87{ zv^3=E?Yg&jk3N2+B_#}H_{=u(sXElo8zkWgk^t}Hq-KfF?;BKx{qx=v?=(W~3*%L+zZ+DKIlbVpkv?n;gEx0;MY zGLD^ELrdf&szZJ>Y>m+&GvnPYD=vl;5X+;`5A)1MK7Cmcw_mlv)ed7njTD-M`Oho* z#_2!LAyoDF;H5+iJfGOE(D+~-ws3dJwa32TvgI^z7nO3??uJSzQDt`ZhW~7?-b|tT zo!*>ZmM=dEF)?L|$IZu`$vX98@tfN#50Anv*PF}viZm z|BKO#;4|2XW!_abmlo8VHwA&o4(t3LJ#S^a>rDny7{!7*P2=y*6LchI9185O&OLq> ztjp&)$jmj_>?6aWQML%)cL&Tjpl9YjDjsDOzxxN~BN$*i?`YS{qaMf&!dj7sGr2nc zN|$U{;jWA1e&G!TBcm+Xv|ocyil5tRp_T#205h6{Q*^uBwAxj4EmXvyR2|ltYC4=> z^5`QC8IXiD0tW6#3P?k2G_w~n{6*2F#$mU%y|Ytt%GSuRx4&q>X(GY})sr;&dcxyQ zNh}(;l*AVi-_?)dZL1+3we3D+>t=$>n(eXmNsoz%8J-bV8eGZZxQ^h;xdFV6u}6$H zn@0~3Td@`Sk~jsoIUi%-V>;TE6puIVS6z2@#%mLmmL`B|;B&=sxVzixDc4>8JRXiU+ZO11)8bU;c~~RYsqrQ$!syTpLt!8W=*aLCdh}Rq1H^0 zn8#HK`EhVGB=jt%XQXL#dpH*b+t=UMVt$P-QjsYU5c0(5FfAz*xofGHnfVIQOR@`2 z$SkBjbw9q+d9_iP#D1qo?w5PuWdUp@&Tzgr7r|?9L8xbtIgc|~Z!wzB$hlv8`lDN# zoVf*^gqz}gZ+>L=cw-Rd=h`}u3xDXJ<`RPW^2#$!+!WZzQ|9-Gj+0#cP72G>1dtyt z19-4X*ZRJnQm!XLDW*}gCkp1k1qt?H+FDn=c5)taMcyBLN5Ed}xg(mj<@=-y9!p)$ z$P>5QyB<>hDq87!yT-O-sMDm`CqG8ooO0eSRKDOB54($y3yx}U7bR}LdY-h3S9B{_ zW4BSc&STij?BuX>OvNdo(~^39v__YyJ&-PU4n16_At=K(i#qLw&&Rv7A0XcrL3uhp zXf;=78c(wzJ*L_R?0MB5k#0R%r)d})WbDMT{!wBe3Ldo-F+yRe9JA-(O$qv<+nE>eb&v#-RUiz7Aj zl;~Xws%{%H^}$JVB>uHKk5d}@j~1xI7Yb!+Bj_7NY)MrPofI-Pj_14Q9CwwwS|g0& zp~M*^&$dIJf%M2Z5_sIAHEZ9i%V98+;1}bMG%Aw9&MY>#ZXv(z)n!ZSh%c(0WdE=~ z8wKp)wz-!y^C7samt`*bdhAhxsKAYBnpGPi>|0>--!E6OY5G%RTqGpLhy~MTP9A%tw>v#%QES#g>@s_aJoI^#zQTcw->YzMu1pRtLcq}a)gQj;+vWni^uIp zu*FO<3?8#LPt9DDht7e^`g?^pb^;akZEfTy&bOrhv@#f)8nT*lJIFRm_4_^0_JTh;%mP@DmK^_it7C#F0hbwtQE&7(Up|88l_4*e``Hh< zIaRV)3Ke|_g9Z>$o2gs(_2Yn^ywUPZ0i*SDaNPv*QKhTGAW?lC30w`4Lib#ij&9I=Dn8%$K^{1IL< z?*imAuMDEm^>x9daO983?@Qwf*B1d3<@bsXrA)m0PDrP zCAo(9!%P6v5az(j(fP#gXbbPDndhJh;50wQ(IAcWLlKRjgVBSZ@V*fX`fz(#c(OC5 zwZfM_Q!4o>J~e7UgHutsg~yHbtMcAdT^;=!0lC3se(HiZRo`d`EmR6{QHxoO;OrT- z)2a*2ZKoZPxw9H3OR-{0jq=u6O}UCrQsMi@3R^Jn~Mf|=AI>1O-wBeL$p4 zGHf$mWtq=!7syvFmC=hkkV|T*#0*Jgu{v>-@#7{o5D$)+0iw`-pX? zRop#NppAJ>5_HHDA0iMtzw2gzGq)#F=4HN4mGI9~qE$9JYx~1`@OO~sX}M^q`{m-o z(;mTj$C-R>TUB2&&@+Cg<;ePzYYYWxC!6)Pxbt>7feD92lKZVUo~!sD6Iew6oe2ub z2OUxzo2|h3wDFr6n}*~=j|zhW6{lyho%vRgmFSDVZ}Ad9sF*K2wtu_5hen$@S~x8f zp`5GvDn?vpmw+A;C@cu04X+>2Bwwhht~Ev(wq5$%QZ}0_I=e$5KOpP3Lk2^FF;la{ zv7GCpAAAwG3+vz!hpi@@I_YmG4TeaQH*ZnL`F+L@|EGfYeg3bPAtf|C@_^(tebAqC zY=RuD7ASG0!Eyu-KNXs%22SGC6gnmHaNd8N0d{u;i2H^@&Vi0l?BRZY&8YwR@00>1 z7E-^^^_KMEY=2#T^%d|?IWnv&mH$1-`s+Z7uydr>qup0-{f|HYb;$493nfzs$&>&8 z>Z0LJrOIy~Jy=1g5*rZ(B_iP99a3A-ROX-70r?B*>!(prQApHZb~t3?i-o-2wyBJH zSQ$uz$;d=nLt?`{nbd4-NH5{&x&z|)f1k7(X>kc!-~%~1xmHTU;o+X$m*SQ!>M!5= z-je_Rg&`KS>lZHr@wadD+Y^|Szg)CG3wmAly?n*#UVNdR694^8w>XU2nopWIrvp^b!$S%I+LdahjOP(O`x|^@ z{vOWXJxGWX+cSvH+soJhq1g2I;qNe;O<1#1xIN!RJd6QgQTEr%mH^t>e6^fJ&+YF| z|CF!Egyh?pq@_jwqw`aQjUWPj*o?pR=ij65Px(U3GtEyYHa6hlg5WGHL@xQc6S8Uf+k)%?zB6C~9-g z_USG|R<;Mtq;Xs(jU>lca%u{aY^LabK-RPuv?{sa6pynE%^FwACt^OL$#BT4T>8zr zHI9d$T`w;uYDx#FLsKGWs`k5q1T1z&H*9som>*lmk9(Wp`@Tl z=Z=^@e1vrT4cald85;)&)@rQ6Vq#5j--GDD9-_Huosu5SSg`^bZHeO~uTQ*umvC=d zuX{rmBe8irTosz~b@pr0o)p29*%~`lm7+I!U7S{v^x@&*3UyYNGz#ef2}(*z&|6%^ zDUl2s)0z=FBzyr;C>u=OiRyN1^9@&f?~@7NecT708j;V`HM2re^S{3Q)U)g>^_jBZ z^_TkhyWhnmBcFYdOI%rAmZ|;93hb(b3oRZ@z-Sa4#J|(SLf$W=W_X;*P-->s)9`XK z$#@_^qU`ZNIoijMNVccF9B~$Wa^fNGB8*z~^g13qQ&cjCfjQj3#3Uj%_6h&_Vhik4 zxlNhSh0Pm(+1-p}EJ!n!k#1^(-kdlOpyPW!b%8V_QE%1pm`8YE7CwNEcJoJ8GR(6IW?Er8)iKEp=Z^VVL|TeiEF>Twom|eH87J8goQ(X6xJ$+Tt}17Ap#8_S?91fZh*rT4Qiz-0MtMw|O@{G9sX^SHcqfHzE$6`SBG}QkT837F(Co(# zvrQdonda>QLaw!)sj~Qcw1g*_d+(_8m9^lNOOmD3c4a}i^p0_fP+7|4L}HmB{bmIbJgyOD{bX0r(#p%tkYR>Jt+3ASIA^f-47d^ z_qE7LhVOkkxDN4~B6(eRnda|7kH`Y^NEiRcn1a?evOhp8By#nv8QiTqaeU)pHD6C- z*O)+uqwPBA7_AwxrD#&i1hgrFvzDuCef{|$f|u%K^Gk&6O{gVA%h^h!y7zn2^3#0d3egitV!-CWgJMW_zoikbnint>i+Iqk zoWk!@c%5p6K&QqMBbjc55$j8LkF2*uF4Y{LaV1+)Kg?vi-@qs~B^7$J@ij2&d`Udn znbXpmWLYfpTPcRULf8bOMaQqCLoq6Xtk z3htEmZ8$3j8IZPO`XCKJW?p5YRrccT*0&5V8eQ zlQCv;n2*0B;he|anDv5})kW?LG@|2>L0f4F$B#~S7q5Y6BS2b^398-8;-+EgqRaWV zf!!oe8X=oTX!FF(bf2bHl8esw1>d0%a3g#{;=HQk`DuQq%lm4_TaD&Tc{YD~QMFut zDg^dwfIlTHEL)qg2RlCLc@4U(2B|AxJHTVf43jK>YLbNd zb348ZQPDFz?Z?U?LbkNPD(~>+qTV9Av?`5ZW{_jyJcJuRWWY892-szJ{O%Z!wj8f6 zRhF5RBA+ExRCZdq3(=RHp>cX9 zL$&Rw(tQW)(q)H60b|hK&zAa_PvN}vK*Fbq@CJP{dzEhqVgoPhj{FBE;zBHk?Ax&X zJkv1)m?b`pqbNFDmzsI(9-)`uy zsb7HIEtXDjQXCyW|C#8PA+8ST2>FG&YO00^@gZ4^Q{K4`=TePJK|w@HM0{@wG&4$1 zo=&R%h{NUMLzA01^m)$8Dw2ggX>ky!hNKn(vj!%HcXE{6PqgwhqX}R-Y)h%nS5yrX z9B$=ncygjU{Ior>SqOJTFwq7tOi+dH&?uV_RV4J3pjkkr^WrvCn{~ui`&)CkLm*Md*gh~+n z)*7(l-^JYzIXDP-l!ckl8E;2;jxld=zsT7K&trd7en?S69Z&%o6LiHdoU1XY zlD%`h^)->H54!n5>Z$w^^Wc3?TGX_*G=7b4bf3?8d)bhnrHTcQ*|1oiqvv~zkQiZO zj7SzJVk@sGG8M#jvXhsQacADRybfL{Wa9+inPGBdQ*FsM##J!7g0Olix9me4?h5Q6AdUo}57dZXS* zmewI28uiljnyx<6{9w4_$MEq+ej^Cpc%TCF%p|p?jpdFYW11Ozg4xP|L-EV&GN$)A ztI*y*e_ZK}_w!J_-f;}k@b)ZtXdz(?w0&1sVz-Ofc2(Ns-dy?p6~`O}_Aa@OWwR(mZ!O8E zD{NzjX;+G6*C!Y>Z}EKw3qLD@;(0eN*gEHj9)Aqfb{Y%po&l;V-^tEby>`o@vJbS z*{CFFR9ZbvA(ZkKKl*V-o)?=-jhRPPBs~*sxJjJf=9}562^|y1OvhQr ze?7)#X?}(2N#)MkQ?Xj8&JQQGMMl*t!!~AqG@2;eMTaf(~v`Pp6Vj`#>4<0$u@F3cRG-amqVJJM!3 zm?v)ye1nBhp;1ZcC(TH5v{0-+ZBJ;~5GhElI1v@@AJ7ZUqU-ogBKV@GJbyYXr;6Wr)KMd=#VZ*UJ)lGnK|qF2M@0P* z9Jj=Pfhfotm>_*dX&kG@k~Sx2#4D1w?V8BSmvm9$Fs= zcyGqw@0aK@>m^f!sRCQhmGN+uE}lfpPFm!%J)k7eFpjyqp>VXOU^z#-bV^e&ab{UF zIYn82jrproC;dRbja=?7I4XavL4spCi`Q%e+s3M!1|iLW1#gvzhw=A zGB7uM{AwMTQvHL_kxGM=)_2!W8O0MLuqmawGo)g!f27+~2pJo&c4Vl1@m>MDZ{a`vm~~hJj=Ogs?P1pzi_M z`Lh(iFGz4f2NGCpZFjMN+9BJiU_&W`sx4iYd*0MFplxrFB4EK3PJ}ET)ahx`+F} zKkuf6DfCCtTmC6Mn5$4$MdWO5$m9_%``i3ZfkK*^iB7B(pMiDU$AAFcgKvCeb#GDs zv8a$Ng=cE&MSq70lYsZ3I+5gKe8~_ajXxIhl7o`-UpynwTR#>T7gv}>6tIo{1-oLl zf|v(D!ope@k3|eRa7;cZj=mdSjpRz41&^cX1D8VE2RX1_`0KLDqHS1L>h z8K{*4c|;@ALgBgSqYvN$C91{yztjgfLJmWD35&&-bSsMtBcXqBXSh{=xYEzTx!y zTK!*YUMDZ1L$8bsnNxuy&5gT=wC@>5q6w3T592}>Mvp~41G}sYhW;Oj?pJGz_%;{8 zyD_c>DMxf4xME|G&@f5wP@J#q`AHs6$zpfQgO2dJt>^uhY|r-^9-Y!+D~jGrG?0*x zXa?bqg=&tBGBhu3_sFak8X5f2iK5#@(Who#141DZ5E=hjjnu!;Zp|klL&J1*0X_sU z78cfY^u@&m(f7=xtO*HkWXp0!3-qBuEPH)Q3GgGNlc%GqwePpHWfp4Os-%DJ(9I3T zFr{1{WHG(kV$GmnA(ao^pRS?q!wexf_K9RrrBZi_VbqGAxVtbeHJAlPm!pJdTIY+$ zv-hCyKQ9St$mZs=sS<0c(!4_-o2k#ZM=SeHpLZOM_xbqU^RJihZmMQ$++M!nG^4h$ zvDqVYcsiRY$GtuAc3B`giBQ!lC5CCdiLs)WJ&GLf!*?n2=g+;3{OKX@R+7HgNFhws zxuj?|xxbjUHzj8gGg$STvKr10MDRt``B+?p6RNx6D;K`s? z!+2=U4iFicrVb7G**(<@AfJqA1e{^O>I{89ak`gO2dK8>rz+e+XX|bH-1zdikZ~C) z^p^BW%{$)kIGj`m65!HBtIB`%a%!+Yj?UL86Ar|sqb268I?r-AUh^_AfhGwLR zLDD}jEUMo5=Ez@4-wj`rHw%5awb7^j<9-AYHR~L-IzyAdp=d;mZSbMr=zzqW`9=3K zt0@;p|N5ij6pB7GfD|mfms-K=!I%L;C$i(1{%F`PhrUMg=SJh#TMDCmuVngwU~KY& z>2hffbwJ{6*iy<3l-Wg0t)Aka5toHr>m@@O}R{$PVXbxbJ6W|cx zwcj;Oj!r=}*8=8@q)j&?Md|V@@Y;QcxKiYRTn`Y%A!aVD8TD=NwntK*d>zSj=U3z` zklMT171gFK~gY@dhj-Nj=EwE5QrHlYj@0{>KYg;Ra z%q4Q6kW-di$~}7_1+Wr^>-A>C6>f4H89kM9DZC80bjp$E0mUr-ix?Isy%1bb}ZNaqef}fj6At!x>V&mTdO<;sxfchhK|UTW$~(iu8!hX3M`k zqSIDtpgP;N(uzyIzV zD2~r8zBFIGuC`QchM8f1b7@8V^>pmA2U($*#*Do=c{Mx<7JxwmYi#sb0dotV>>QZ= z_er^CJRslCJsL~ zVUpOb<;YN;SwC7cS*VTh?)w^g9z&xu@H&)qw89@@H4D}jaWDMPB3E9@b&n4`XLrkUU`TgJB8VqBKO3|Zk6o}68dAi>XLq{6;O97U#whz|_6$6gfv%2>OG_bmaoBe>BY6GB zcrYoK(ps~LMT_GJr(yd0?Qc@?T;>I72q3Rkf!VtAy(uO@U!olPN$_^<15NMsRG#2> zCYPD2-7r7_Dp^$MX0(%S@)Vi2lX%W7#f|mVbkz=h$PaclvQD@p@#96|{3&b3q-lqU z3HMtmnfWYhYqyF^CiQaZY4-hRTGQuYO}ATUEsCA0?ziM7OgA75SvwWHdjX(wD;#E# zA5}qZi}NGv0}e=xgyU8UBx)TQm00jV8_-gRykmAdqMs^uC>ym_QYFD()WX~wt4bK< zBEznSX+9bKI5J#dLVcryl=%dV;iez$s&2-qfx&IIMuIl;(K02IY;vy#?p1q@NY(`$7|UD%QL2ONH<) zr>i1~scu1gaiB5?KNK&UrMa@r0tl^zSwmrGKD_WM_LkSy<+fkl{1If76k zwq(q)3_vkgsy~&hROYl)6JxY;W`8tU+`@V|@~UT;!(v|a+~YWJp)RyYqfQ%0&`mm) zTbG<%BNrQTpH!mZxyQW7J&niKJWcOF8I`jj8?$h;#YG?pLQ06=qA89p(CCg2>9e14 z{W?0)t4v#Wb{PW_2@^^{FGw$Ol8d2N7C{KiQ;nrQ_>`f*65FQeqV_ctZS|ywt<*kZ z`zDln@F3ZIGQH865WEh8|4QYTDKOCFZkg%)LN^|MKH>6cBL)G3WK)*}@_6@ZjL>15 z$M1sR8ElCsGt?*cLm(#jS*Bc_o6OzC2H~Bp$<=kxkVHmr)T(OH&A1zV1sdX0@|PGB zBL{|8JsvmB77C~m2EkcRGEbc>x+>Bq2`7r?-nq!=9-s`N2H-P0YBwvQF2nI*A2f{- zz_%*rsmd39Y^Xcm)=Pg$|LDL^_&rPn{=%RN1_x2XbBizN-%xNFO+h*O0z(rlWGFvA zwN&mNXP~HdL$NI~n(Ie5&%jrD3<;mVM;Rq;(HtpIHGyQW6eKyz4?iU&B5?B!J+kx!}GRs>cxv1L2h^Z2il10^zR8rR$n0*1Z~Pn>Pa~ zSC5h8yV%6vC9oJ(u7|qHn>^vud3)>Jd@Wzn8bC@HQ)uHttX1o%j!ZBla>BvKnP0$? z{1h`ocWDW$^J`9-9?{qH5}6299RU6wOE#+ey+i{OA~qYH`))Kb>9`)p36MJPj_JfR zy)9%4tmIWQY9>fNSqjS9z&o2Ekz+W_An{`|Z$y=&)p1+%SRJUkqk8IkqB0G_>s4 zh{`Z{gIc4|GY~ib_hFU@p^MPpTagJS{i{6}OmrNP>v~XO>v4_W`R8E18|t!0I3xLt zw#}K>g_~Tuf^?Q$(q=#1i_W&#rUbHH*Br*(@aGeFre3P*aTs{n6baUmiKl;FwJ+(w z`1SckO2C>vY35{(dCpqR+79PPV@OVttPPQV@J3_;%B#DkV(HzF2GnyClQ~>xcM1G9 zgVHm_Y4G)rn7?&KKDWRmbJ(BEtE>1idg4Sf!tu>oPVR{%(}$+CLeAzb{80DOwJ6qQ zkSG9dxtOk%a)`gSGOl-yM})uG4yp3b64E_>=gbtzxMH4ioGi-A3|WMIL8xSEgJ=^= zk9!JA30sDn&_T(xD$HXuM>wLHrJ418iD?LU6i8e#*o|hTJ_+TQh1=rfF z`h;7`Lcp^xx)?M*A{KunIABOBb^wQ*JeTDpy&{!dgq|E~RMoaImR7H+V9^K&SbJupi?3vU`^_uho0k#=hTpw1j_L&OBs)+X=};ZZi#n&Bj*F}l9P#9O7r(%=X^l2R0sC;& zQ#0M_dUZlOZ|~np+AZ$oH*Hkf`9WU@2fTY@hI0k&(DQ<*n1uzCu+@rO3!g2Z$}bib zGnq6%%?8>vFMbm9iD;Jn?ee8mZD9Y#rzn14=mcPk=X)+1dM8FRsJ=lb=6)MAc2sQF zgN4&NCHq074B`TN$VTOM6e4S6K33@~i+6eA{mS^wkXC`D8bmW^Z{bXkX^ zTZ3&6ZdH0ZK;S-w;MH3tk~I_%va)`XA~g!m!BUtT=zI`SjbmP{-Ca+EzSUDBUhufL zvc1_H?&upM|5|Nx*lAA9wk{ohA3h?S^9^OCsfclNuz$h%SZBRQEA2fe9Ra<1jZkAE zpL`=*%S$#|<4M-}+l{(-L7#XyDU&`A0T&&CxRWm@ga-jcJhuH*D}egU69dM@VTVm$ z%n0pgfM?2>g-TSHw6uR972>me)rCD{k5qJRmR%TcUQ}lCkco+6d2bF~GR^BEZ5uRQ z8%B;c#l6a1jjb)VtfAhUtMdVzpT`R9RN}O%1?hd+nGOXThfZa9y{kohqPBVgl zF#7s94S=SsXb4c$Sx;FxxJ{P;ilv?N*bKZc^1q2EL-8cfq z7CzXTtVn_%^>Os!M&ne09Zu%ZT#WiQ)?N>VBfY&j(v;Q7-Evqy_;l7oO-YpXP-YR| zODGj_cq&)M4mb0y8)ii7IR5ENCrtig$&f0C>Ft9+-7;WX8Y~bBTqvRVB%}rBUY%sZ85@iuoaNJcn&#GN;T1ifu~(QeAZ@~K1j~1C-ke(+ zuoak(PU>KpR);Ldu1SZDe$F#QhR$2;{Zk2mm zt@zuS2ztwB`+ycA(k11AC5M2qniWUymHwAw)^`)2p6e3VpWj7qV4fSdb6*~ecx-E$ z_h*Uo1<@+g)up&+s3eK{&sdwAC3cm>H+$Uec&vqZ+;+G*W*v@R05L+Vr%iXSXjOC4 zn_^{c4_n2wIinXgiz#qQ9d;9L*>4SScpCXzb|Wn%bnk8LWWzHgLp7;mZE6m#Wmqj zU&CGspG`0QUHmM0yM;)XaOCB>&cf%Cp-qPm3SZS%9IvCP5+PG~ z;$eE3>PO+svQ?ANEeomnlRk!@=H4uE?_F8EeL;&1)+vXc=Wn>OkGb{Ml0tg{HQivB z;%u_$T&_2tFShP(mNk;x{Ju%useQ{Ra@?)Xsf{zlS`UA@QJZ^WEyTy=NsAyB2=9D7 z>!~|Lf6kgH{l%Sb)T4HnmWoBA6|;-u%hg`y{adu4kF>b@gEUT6Y*VZczzzclQxm=A z8WY6+JYTd2dp&5WAvG$r*%`6&Lr1{9s1tI6)m#okC^5{2Kq%F`rmEUE2xW0N@RVy=Ku6wZIWv`gS9(5ztadECiy zKR0bPEndd!L|G8Mu*nZn64Hld>ZSq2PxpNf5QxTJP$$xy4+ZbFw4<7 zl_?m*kv^T%vFmbH7lN8tFyU3?0P$y{GE4G*viOL-KFXK<_9Usw!lr+Me}%?$lFu)8 z&B*0^_scoXv?uNh+5|H)HD3hy?<+kvZE$@E8sD=V8sdL&*!)~=TTJ8>)Ug+uf!V@K z$Z{L#uHX!JVYD)3)}-pC?xS7%pTlC829>LRRQpDun+*x!+<(h{}bBh@diNj#P1mS za1X@B-%z}eCr~XAVsOuYauv4l0C>;y^{E=lA9MayfItC)bwtc35x>PRf1ei$X%+RJ z$`BVUe*B-+g2qnG#sc{Kf7!AJ)EBt*?*A`iVdCpL|8*#Y9Z6KwN6OF;V24m+4wwaA zyux{c^Bd9^;zgpMpptOa^lI;)dZgA_0f6(oGqIjJu1Wj%%Dse9r zpAU)h@%eV;6-EBsFX|T@4XFMBVcV%)m5@>-7T5CH?ah@_X;KacEj?APlD;{$(_g&= zh7=Q*nDQgiN78d&T1;{22Ryxn%o70xz-Ddt_ZoC@!&i3(5UBlB- z!=GP$s`$nKbrXUXGay6yu2lYk@}r1R$;Qtg#$x~o^98J;tjNF`M}S1!LySr~K<5P= z{zLy2e={|Dc0w`pK&?zkKnV+Dz3~1M_Y-#W|3(x-sqhHDWBoTC;tU7Sj0+^ZXc74u z_2pZ{zw&w@{(7cYS+=wUR*n64_u@ueWaCkaR?5m=+H|z~Pkv>U6B#tXV*Xn zz9nL=e)Rx^-UCEei1+F-0#2*2E80PVU#(^sD`e6 z=EMEg$akZ|oQ!s(|DgX{^$u1aE6Gd7>k=L(Xa7s z3k5AJ*aMx^O#3gzB&34%Ok4>M8Ud%T*moJKujErls6{xb`@J77Po27Gb;Aha_4sIjIJ48AbVOjgo-Y zd+yXfY%K2s#>bB|X}{QiMGQ z0us{20yh5jQD6-0W+*C-HYgPDxtcOO0D+w82MaDm8bvKVx%GH$Tw`A05eA^x%VP-Cx2gLH$%_wEQ#m7HuH&Cb-WqBnb9p>0p?s%ww zG`SLrEDC4gc6A<}IY5w?3aQtP%2UD)FdBbH?DGiK#~|e0`gMcH0_EqaaJwIijKLM%eLvH$u7U9OVp=_)uXvuieN($K?jkJ zA_9)wS6zd>Imzn+ZTwyWw936}m%e#`voYFXGw8ojmW2y(Ip0I^Ar0WOoT-p~ zZjcFvPyn5IvEW}ma|Lk`rSTZa_EVlNN#X17X)L^(Kq#nWDRnMSNe>*#<5B8fU}twJ z)l-orfjq6OO7urXI!&Z0Zr3%9MF3UE#8iJ$52W#Y`It#FZHGAw6grSl+502P;^YZ@ zwB<}y_@{LuJ`85W0_D2sK#BdPtEa@*WJ`d07jKp;HurspqrO^#J|B9ac8&Rmk<%Y? z4T<L{Ah?fQv1x4UXNqP`poI~cqno4cKQSKohZ{iTz#FRDjTla{wQLshbf>=c zQujIW5Bvai*$7a&2V(vkf1awWQO$t7e>2h_2sx1S_Cs5Z#}8gcDd%@@t>&G`!>Brw zASO!Wb{}>`>6hY^NfYTn9xix^cRX5Djb3U+fOmOn!6gCY_#JJyZ1^xg!E4|NS7p2{ ziBWZKsZrTSvf^NkCZ~9Xu}Nn%plFVI0t7yA==@;n-T?w18lB8dW%_okdf9cZStNww z?r8{>0u3s>i+>t5U1Wek?WZGfg36|jwSq9t9X3`DH+K{ZicghHasM5?C&?4PlmqFz^Y{TLn zKhdMIHHs?7J9zkE5O!`N75CVruQu-^HrUTGejX3*KDRyDw5du>4Po|Ry%KoQc<{}0 z7ubE3|CyU^J>_d4JPA2B4ANoPR zQ*Zk5YQ7$bw?U=@LUJEpix_?h7rIYx@IQPAYY5c3+=`hN5KVq6Mc>%mx^vhmL$IZb zBKJWm0b1ddKzV@ZRG4)w!7RQo5nmIHm{Sem2uK*9Wb?(a=kxGYc=9WqK&=sqFvrmd zxlJf+oxmmd(5$eApC`bP4c_3tX+* z;c_a_mB}q=)AWu?{yf~+n{(OY<%xLl4eDt-U2&Q?F}beLDg+9qZ4`5&93om{==15K z+RR6tf9C5&#ac9;o3{irBrxI1}yW#+c6`;gU1~=Pc z5Q6ekolU6Dk#sE;2G*|FTi2&Dk=JSZZP&Av?*`(%G2+>+I4*s2>Y0L;(s}!D-N!7H zCI)V5gf?x?qNK?!9A#d?b1!)(f!&VnCE@D1!P@sZLXW_lmyfX9Uw8`pyeHrj{*eHo zn>UDk^X{ylN1fUF3$pIu`^L(foS3pfbuFrSl+gI|Fp0FzR!>zaW3|+LMY@p%*F`hf zPngtK7(01~st31XP}=RvHmtG*^g}Bzq>0e3_ghZ^efOh}mb}-YB8|zd9dT?XO5dJo z#i#ihb*L@?9pGZUt?bWiK^Rokp;Y8F|Y&6mPM%c zT)#z{^sRGl;&T_-P_dUU2}zC1M*)pvO7{!HVn5+6E*&nBSYTxjf8Ds1-Q4JzPDO;SE@0M7HR0KCE?%-YEn4H&<57cosdl#_#Lnv~FQ|Khc&rsMTAfJ+7!EG)&zvx;}WaAacv&tEt0QV^2h!(?Do8qL{z=(_x+=`Nj z?HCucLaV3`Kj*D^WWY)B@+y3{w&&IOT$k&HfTCpj?rqJ!%gs~pvHhga)VMviInUI- zH_CcPPj4Y#-NLo25qD>g!3_jm_3UxAfs9HKsXP3X0S1O?M7;~r=f;VR?j#@Zr~%08wk@=Hp{w^aaX?P9~o2#fG51dyH1w>u1yiET~LJd=iUb4kg|I`v=g@3wOD8j<8KYs|PSvkbH=e_$VWcQ|H&PjEj($5AP$H@VP4IBQE zXJZY1d`dZyy+?6XvM$H+bn*R2q-Sr>7#IGZ#$>_8a#ZL{wu7c&>nQI z53$gSsfnz(XTCj6%$E4Qfe33VtrjTj{qf2Z#w5W~+^!OWc>ZxQ&2Sri_)TCUCwW6` z6VMve5PQ%h5LcgJoj~1wux#IH%}2uO7f1&8yHjtm6kgOz6jJeeNFg4YPj&I@;7S(7 zP^UmJZ=Tew>$&Pi5#}f-y-sYd=~Kn-XLp7z3*tgpaLEVblMkA#^zgvcQ$J7MP{)&X zh2(u&9i=rq^)%hqgkMpmS%Zcrl5*MZY)k_?={}#HW(XVZb-e&B!*R^DTd|~+Mo)P5 zdW4-Mj@Wdp@~$P_2s>Q@A^M?UwdTZe`t)3aU{nr7H}g%X`sd{v{DD<$hzrkJzm~Lt!`E7l%f@t3@x?i-}iqy z8r<9Dl`a``bQe7qxN9N~EBthl*ec(fK}M=asWANZLD24}Of$KcLj8?etuA)H4LWej z`)`2Vw3u~KaUU~Hk)j#%@OGwn1ba|xS%`H&PlB+l74DTd?2GciHRf*O#$y|4cVUH` zQBN2*$`HFsD1b+E4^XXTnL2??_PTw%1L1%H_+cI^ z>`YhMO3~k8LL>>tzXLYLMn1$thNv1!SEr}eH3=Z6g61KaaWk+t5+{v^%FwliiqN3x z_LsQ1AH*bSmoC!}B@i4J&E2KGX*Zm3m4C5;rLmnjY+O`q2u|a|un&U>OnYD@ub&v6`8%T0nAAK|5 zCd2mAma&#FLkl&j2o<9lmc|4d%R{f^Cq!yN=lZ6259aSwqqYZuQFcz#>7i$GUNrg=x~LQhq36BXz&B>wBUwviz>-!Fzd1&)dDkLPFR%}W+k3C)z!WMaYs?gFTd5fqCs>SK1w13d{=)#C(ur; zpIS_s4PO?YVJSNCQ~9AFB4MWwBdSUBLx;Bl%lA&kyPng}e5DN>tS1vzLmUDtLeN9b zIyzC`M4r_T3yChAk?sjR(8Ve5{)?IfXYo86*z&~LOelI>{n-VfN z?zEN@wKs_|7Ws#V|__`j6fyg zb8MP|^{F-yR%IX%T*^pK+gez}#1fyx4`-J)9pz0WD<0E^Z8-WYUf+Mxx?ag^EI^}p z6iAHihbwP&JGYIu3&L9M@VmjSRu{655>#<>YnRwyd2}CauTjJjki9J!-%3&gN;30I z0*0Fm90ujia|GEWopmc)ntWO|d#qa0mW~xhi`jFQ-`rRgwtb?#Ib86fvZyn3t$!Ag zRP@zqu!S=v^}*k43vzflQDv@mPJAn1Zv<>g7t*uHik>0!A^KDUI2R>D#oMSW{n+Z@ z3@e#C&e@I!)dvGA{bY(yR~@}~Nk6;#e!aUbxqmu1N)w;xzRm&5nu~4gCnK*=mR8inbH2E5yygsX4opJ?&==iOU7}Tmq)!!n5>$Py z6n^>JMydI7XxQkxtCZ*LlXZ^i9b5Y4Bri&$+?XrkauLXhVYa-xFEMi^B7;X7nRKMj zh)3w-MNdo91GH~$7ceRo5_mE6kWBKU)uFm<(DCkKYh#4dUiI+TpavY!cFSj>h0XI1)1rwbbHu8EJURUqh zDz20;6PkbZ5anZ*p=fo%Kr~C1@BGrTWu{NsIoBweToHi^P9u|w<1O&lEPc9;>Xo&1 zDjsTf&Q8y2%#JTZtmF|M{6rAf@mJ|9UJMGEUQIEfPP$3LatC$=+PO3DG)Aq_a1u{# zE9CpQC>XalrOy4jkit^O=hync|HgW(QgIw=k|eE+SCnY(P2tDh z(!xvj6am4#Otcc<6c*2601em>L&$OAS{gGHOxTj`1lWAJ<=cUfl=MwfBcFnu#-5AI z8t-iL5|@iJyW2>@!`21?7)M7U#=!ZS3Vtu?vpC#YbOu*nT4;KHnL;vdc=hG@tepG# zknuC3U&l3DHj&UD(e&gVQ#oSF&}{Z!3HWq-hxhqGV{6%?fqQ&ImNpaUbT~Km`h)W& zf_GKCb6s=$7|&Rfa#!H2y9B21|faCQetKX@Wr-RI~O{Wkt@{^hxdY-7jx5 zWh}@j`#6m$KD0@4)&#V@tPtx?-q`-a^QVexgD!VD)P~kS2}&6LioJDTbHZstaQ}gS z78TV!PnOj{X2x6tL}%^Euh)4;OP*5KypP2t>w0Nv-NXp(y?i%b=eT z5bs^IGRby$=X4!`fql=K&34$>*oQ0Z@f3N~-%o0jCYg#OJRFj3)9k~uAK1Jr?{=-o zA*^3Kk>=*xZZ->0HTh%svyzwTzbO_7reYM24!2&I;e&KEe8-Q7%&urQJug`vgVHsOKI$iZ`HGkkGuy8(rcWBP)$=^o)U66(V?nn3etGHc; z)Vb|*rlS-6lRf~E(N7VL!AWuc&@|8g z6bS@TzPOt=`yZGQ&v5?FbeRx`TwEOXzj_#$vi9ptbOF#o1N)@5sTcS z0JaQXZsZyqURB(|^zE_ZKn3ltJrexNrIE+tw^<`^sOTlYb0)jv9v(2Udr$yq0)8H9 z2ZI5>!4y4)6~#ikBSmXJG1i5}f6q_VC1Mzg;OEbN`emubRbDe*x1$-KYYe!56W`cw zN8f~TzFs*y7fo-yi`CJw9|er**UF&(9wb;CjI=Qaey?y;$NBlSGO~D<`fna;gig^& zIUD9m#v%QCYm~!Ey$ahk@uCC}oc_`XL`hBDexmMZJhsTK*)5re|9aH1orq`qD!0xH zj|A8)kfVa+AIYt5GyvnzuClu8{Kh5hEEEI?V7FMPc+2gp`;CDDI5*ub&TXFaKNp|h z=d?8~5I|CZBU|$RumG`OH6T*Hnc26u=qH`<|HRloSkM;d28uWOg=haZx*yLL=T;zR g$M?r+f#BaGWb6vKSooe?1pzN3eRI7hI*w8Q1Iozfp#T5? literal 0 HcmV?d00001 diff --git a/bin/get_minimal_test_data.sh b/bin/get_minimal_test_data.sh new file mode 100755 index 0000000..577fea3 --- /dev/null +++ b/bin/get_minimal_test_data.sh @@ -0,0 +1,105 @@ +#!/bin/bash + +CURR=`pwd` + +### Get input fastq files for the minimal test + +DEST_FASTQ="testData/minimal_test/input_fastq" +mkdir -p $DEST_FASTQ +cd $DEST_FASTQ + +echo "Fetching FastQ files..." + +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357070_1.fastq.gz +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357070_2.fastq.gz + +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357071_1.fastq.gz +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357071_2.fastq.gz + +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357072_1.fastq.gz +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357072_2.fastq.gz + +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357073_1.fastq.gz + +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357074_1.fastq.gz + +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357075_1.fastq.gz + +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357076_1.fastq.gz +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357076_2.fastq.gz + +wget https://github.com/nf-core/test-datasets/raw/rnaseq/testdata/GSE110004/SRR6357070_1.fastq.gz +wget https://github.com/nf-core/test-datasets/raw/rnaseq/testdata/GSE110004/SRR6357070_2.fastq.gz + +wget https://github.com/nf-core/test-datasets/raw/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz +wget https://github.com/nf-core/test-datasets/raw/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz + +wget https://github.com/nf-core/test-datasets/raw/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz +wget https://github.com/nf-core/test-datasets/raw/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz + +wget https://github.com/nf-core/test-datasets/raw/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz + +wget https://github.com/nf-core/test-datasets/raw/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz + +wget https://github.com/nf-core/test-datasets/raw/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz + +wget https://github.com/nf-core/test-datasets/raw/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz +wget https://github.com/nf-core/test-datasets/raw/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz + +cd $CURR + +### Get reference files for the minimal test + +DEST_REF="testData/minimal_test/reference" +mkdir -p $DEST_REF +cd $DEST_REF + +echo "Fetching reference data..." + +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/bbsplit_fasta_list.txt +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/genes.gff.gz +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/genes.gtf.gz +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/genome.fasta +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/gfp.fa.gz +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/hisat2.tar.gz +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/rsem.tar.gz +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/salmon.tar.gz +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/transcriptome.fasta + +wget https://raw.githubusercontent.com/nf-core/rnaseq/3.12.0/assets/rrna-db-defaults.txt + +wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/reference/genome.fasta +wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/reference/genes.gtf.gz +wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/reference/genes.gff.gz +wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/reference/transcriptome.fasta +wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/reference/gfp.fa.gz + +wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/reference/bbsplit_fasta_list.txt +# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/reference/hisat2.tar.gz +wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/reference/salmon.tar.gz +wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/reference/rsem.tar.gz + +cd $CURR + +NEWDEST1_REF="$CURR/testData/minimal_test/reference/rRNA" +mkdir -p $NEWDEST1_REF +cd $NEWDEST1_REF +for LINE in `cat ../rrna-db-defaults.txt` +do + wget $LINE +done +cd $CURR +find $NEWDEST1_REF -type f > $DEST_REF/rrna-db-defaults.txt + +NEWDEST2_REF="$CURR/testData/minimal_test/reference/bbsplit_fasta" +mkdir -p $NEWDEST2_REF +while IFS=, read -r -a line; do + url="${line[1]}" + name="$NEWDEST2_REF/${line[0]}.fa" + wget $url -O "$name" + line+=("$name") + IFS=',' + echo "${line[*]}" >> "$NEWDEST2_REF/tmp.txt" +done < "$DEST_REF/bbsplit_fasta_list.txt" +cut -d',' -f1,3 "$NEWDEST2_REF/tmp.txt" > "$DEST_REF/bbsplit_fasta_list.txt" +rm "$NEWDEST2_REF/tmp.txt" diff --git a/bin/get_unit_test_data.sh b/bin/get_unit_test_data.sh new file mode 100755 index 0000000..8cd6ead --- /dev/null +++ b/bin/get_unit_test_data.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +CURR=`pwd` +DEST="testData/unit_test_resources" +mkdir -p $DEST +cd $DEST + +echo "Fetching unit test resources..." + +## UMI_TOOLS +# extract +wget https://github.com/CGATOxford/UMI-tools/raw/master/tests/slim.fastq.gz +wget https://github.com/CGATOxford/UMI-tools/raw/master/tests/scrb_seq_fastq.1.gz +wget https://github.com/CGATOxford/UMI-tools/raw/master/tests/scrb_seq_fastq.2.gz +# dedup +wget https://github.com/CGATOxford/UMI-tools/raw/master/tests/chr19.bam +wget https://github.com/CGATOxford/UMI-tools/raw/master/tests/chr19.bam.bai + +# MultiQC +wget https://multiqc.info/examples/rna-seq/data.zip + +# dupRadar +wget https://github.com/ssayols/dupRadar/raw/master/inst/extdata/genes.gtf +wget https://github.com/ssayols/dupRadar/raw/master/inst/extdata/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam +wget https://github.com/ssayols/dupRadar/raw/master/inst/extdata/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam.bai + + +### Resources from https://github.com/snakemake/snakemake-wrappers/tree/master/bio +# DESeq2 +wget https://github.com/snakemake/snakemake-wrappers/raw/master/bio/deseq2/deseqdataset/test/dataset/counts.tsv + +# preseq lc_extrap +wget https://github.com/snakemake/snakemake-wrappers/raw/master/bio/preseq/lc_extrap/test/samples/a.sorted.bed +wget https://github.com/smithlabcode/preseq/raw/master/data/SRR1106616_5M_subset.bam + + +### nf-core test datasets +# sarscov2 +mkdir -p sarscov2 +wget -O sarscov2/genome.sizes https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.sizes +wget -O sarscov2/test.bedgraph https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/bedgraph/test.bedgraph +wget -O sarscov2/genome.fasta https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta +wget -O sarscov2/genome.fasta.fai https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta.fai +wget -O sarscov2/test.paired_end.sorted.bam https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam +wget -O sarscov2/test.paired_end.sorted.bam.bai https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai +wget -O sarscov2/test.bed https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/bed/test.bed +wget -O sarscov2/test.bed12 https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/bed/test.bed12 +wget -O sarscov2/genome.gtf https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.gtf + +cd $CURR \ No newline at end of file diff --git a/examples/minimal.yaml b/examples/minimal.yaml new file mode 100644 index 0000000..395205e --- /dev/null +++ b/examples/minimal.yaml @@ -0,0 +1,40 @@ +param_list: + - id: WT_REP1 + fastq_1: + - https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357070_1.fastq.gz + - https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz + fastq_2: + - https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357070_2.fastq.gz + - https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz + strandedness: reverse + - id: WT_REP2 + fastq_1: + - https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz + fastq_2: + - https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz + strandedness: reverse + - id: RAP1_IAA_30M_REP1 + fastq_1: + - https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz + fastq_2: + - https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz + strandedness: reverse + - id: RAP1_UNINDUCED_REP1 + fastq_1: + - https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz + strandedness: reverse + - id: RAP1_UNINDUCED_REP2 + fastq_1: + - https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz + - https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz + strandedness: reverse + +fasta: https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/reference/genome.fasta +gtf: https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/reference/genes.gtf.gz +additional_fasta: https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/reference/gfp.fa.gz +transcript_fasta: https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/reference/transcriptome.fasta +bbsplit_fasta_list: https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/reference/bbsplit_fasta_list.txt +salmon_index: https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/reference/salmon.tar.gz +skip_bbsplit: true +multiqc_custom_config: https://raw.githubusercontent.com/viash-hub/rnaseq/refs/heads/main/src/assets/multiqc_config.yml +multiqc_methods_description: https://raw.githubusercontent.com/viash-hub/rnaseq/refs/heads/main/src/assets/methods_description_template.yml diff --git a/main.nf b/main.nf new file mode 100644 index 0000000..fd40518 --- /dev/null +++ b/main.nf @@ -0,0 +1,3 @@ +workflow { + print("This is a dummy placeholder for pipeline execution. Please use the corresponding nf files for running pipelines.") +} diff --git a/nextflow.config b/nextflow.config new file mode 100644 index 0000000..a8b8d2d --- /dev/null +++ b/nextflow.config @@ -0,0 +1,6 @@ +manifest { + nextflowVersion = '!>=20.12.1-edge' + homePage = 'https://github.com/viash-hub/rnaseq' + description = 'Bulk RNAseq pipeline' + mainScript = 'target/nextflow/workflows/rnaseq/main.nf' +} diff --git a/src/assets/methods_description_template.yml b/src/assets/methods_description_template.yml new file mode 100644 index 0000000..24ca59b --- /dev/null +++ b/src/assets/methods_description_template.yml @@ -0,0 +1,25 @@ +id: "rnaseq.vsh-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/rnaseq Methods Description" +section_href: "https://github.com/nf-core/rnaseq" +plot_type: "html" + +data: | +

Methods

+

Data was processed using rnaseq.vsh which is a version of the nf-core/rnaseq (v.3.14.0) workflow wriiten using the Viash framework .

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
  • VIASH
  • +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/src/assets/multiqc_config.yml b/src/assets/multiqc_config.yml new file mode 100644 index 0000000..0ac2fb1 --- /dev/null +++ b/src/assets/multiqc_config.yml @@ -0,0 +1,164 @@ +report_comment: > + This report has been generated by the + analysis pipeline. +report_section_order: + "rnaseq-methods-description": + order: -1000 + software_versions: + order: -1001 + "rnaseq.vsh-summary": + order: -1002 + +export_plots: true +disable_version_detection: true + +# Run only these modules +run_modules: + - custom_content + - fastqc + - cutadapt + - fastp + - sortmerna + - star + - rsem + - salmon + - kallisto + - samtools + - picard + - preseq + - rseqc + - qualimap + +# Order of modules +top_modules: + - "fail_trimming" + - "fail_mapping" + - "fail_strand" + - "star_rsem_deseq2_pca" + - "star_rsem_deseq2_clustering" + - "star_salmon_deseq2_pca" + - "star_salmon_deseq2_clustering" + - "salmon_deseq2_pca" + - "salmon_deseq2_clustering" + - "kallisto_deseq2_pca" + - "kallisto_deseq2_clustering" + - "biotype_counts" + - "dupradar" + +module_order: + - fastqc: + name: "FastQC (raw)" + info: "This section of the report shows FastQC results before adapter trimming." + path_filters: + - "*.read_*.fastqc.zip" + - cutadapt + - fastp + - fastqc: + name: "FastQC (trimmed)" + info: "This section of the report shows FastQC results after adapter trimming." + path_filters: + - "*.trimgalore.read_*.fastqc.zip" + +# Don't show % Dups in the General Stats table (we have this from Picard) +table_columns_visible: + fastqc: + percent_duplicates: False + +extra_fn_clean_extn: + # - ".mapping_quality" + # - ".MarkDuplicates_flagstat.output.flagstat" + # - ".MarkDuplicates_idxstats.output.idxstats" + # - ".MarkDuplicates_stats.output.txt" + # - ".genome_sorted_MarkDuplicates.output.bam" + # - ".genome_sorted_MarkDuplicates" + - ".read_1" + - ".read_2" + +# See https://github.com/ewels/MultiQC_TestData/blob/master/data/custom_content/with_config/table_headerconfig/multiqc_config.yaml +custom_data: + fail_trimming: + section_name: "WARNING: Fail Trimming Check" + description: "List of samples that failed the minimum trimmed reads threshold specified via the '--min_trimmed_reads' parameter, and hence were ignored for the downstream processing steps." + plot_type: "table" + pconfig: + id: "fail_trimmed_samples_table" + table_title: "Samples failed trimming threshold" + namespace: "Samples failed trimming threshold" + format: "{:.0f}" + fail_mapping: + section_name: "WARNING: Fail Alignment Check" + description: "List of samples that failed the STAR minimum mapped reads threshold specified via the '--min_mapped_reads' parameter, and hence were ignored for the downstream processing steps." + plot_type: "table" + pconfig: + id: "fail_mapped_samples_table" + table_title: "Samples failed mapping threshold" + namespace: "Samples failed mapping threshold" + format: "{:.2f}" + fail_strand: + section_name: "WARNING: Fail Strand Check" + description: "List of samples that failed the strandedness check between that provided in the samplesheet and calculated by the RSeQC infer_experiment.py tool." + plot_type: "table" + pconfig: + id: "fail_strand_check_table" + table_title: "Samples failed strandedness check" + namespace: "Samples failed strandedness check" + format: "{:.2f}" + +# Customise the module search patterns to speed up execution time +# - Skip module sub-tools that we are not interested in +# - Replace file-content searching with filename pattern searching +# - Don't add anything that is the same as the MultiQC default +# See https://multiqc.info/docs/#optimise-file-search-patterns for details +sp: + + fastqc/zip: + fn: "*.fastqc.zip" + + cutadapt: + fn: "*.trimming_report*.txt" + + fastp: + fn: "*.fastp_out.json" + + sortmerna: + fn: "*sortmerna*.log" + + star: + fn: "*.star_align_reads.log.txt" + + # hisat2: + # fn: "*.hisat2.summary.log" + # salmon: + # fn: "*meta_info.json" + + preseq: + fn: "*.lc_extrap.txt" + + samtools/stats: + fn: "*_stats.output.txt" + samtools/flagstat: + fn: "*_flagstat.output.flagstat" + samtools/idxstats: + fn: "*_idxstats.output.idxstats" + + rseqc/bam_stat: + fn: "*.mapping_quality.txt" + rseqc/junction_saturation: + fn: "*.junction_saturation_plot.r" + rseqc/junction_annotation: + fn: "*.junction_annotation.log" + rseqc/read_duplication_pos: + fn: "*.duplication_rate_mapping.xls" + rseqc/read_distribution: + fn: "*.read_distribution.txt" + rseqc/infer_experiment: + fn: "*.strandedness.txt" + rseqc/inner_distance: + fn: "*.inner_distance_freq.txt" + rseqc/tin: + fn: "*.tin_summary.txt" + + picard/markdups: + fn: "*.MarkDuplicates.metrics.txt" + +skip_versions_section: true diff --git a/src/assets/optional_file.txt b/src/assets/optional_file.txt new file mode 100644 index 0000000..6462a73 --- /dev/null +++ b/src/assets/optional_file.txt @@ -0,0 +1 @@ +Optional! diff --git a/src/assets/required_file.txt b/src/assets/required_file.txt new file mode 100644 index 0000000..b4dbbb5 --- /dev/null +++ b/src/assets/required_file.txt @@ -0,0 +1 @@ +Required! diff --git a/src/assets/rrna-db-defaults.txt b/src/assets/rrna-db-defaults.txt new file mode 100644 index 0000000..4223356 --- /dev/null +++ b/src/assets/rrna-db-defaults.txt @@ -0,0 +1,8 @@ +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5s-database-id98.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-arc-16s-id95.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-arc-23s-id98.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-bac-16s-id90.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-bac-23s-id98.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-18s-id95.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-28s-id98.fasta diff --git a/src/bedtools_genomecov/config.vsh.yaml b/src/bedtools_genomecov/config.vsh.yaml new file mode 100644 index 0000000..9ec2c31 --- /dev/null +++ b/src/bedtools_genomecov/config.vsh.yaml @@ -0,0 +1,56 @@ +name: bedtools_genomecov +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/local/bedtools_genomecov.nf] + last_sha: 0a1bdcfbb498987643b74e9fccab85ccd9f2a17d +description: Compute BEDGRAPH (-bg) summaries of feature coverage + +argument_groups: +- name: "Input" + arguments: + - name: "--strandedness" + type: string + choices: ["unstranded", "forward", "reverse", "auto"] + description: Sample strand-specificity. + - name: "--bam" + type: file + description: Genome BAM file + - name: "--extra_bedtools_args" + type: string + default: '' + +- name: "Output" + arguments: + - name: "--bedgraph_forward" + type: file + default: $id.forward.bedgraph + direction: output + - name: "--bedgraph_reverse" + type: file + default: $id.reverse.bedgraph + direction: output + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/unit_test_resources/chr19.bam + +engines: + - type: docker + image: ubuntu:22.04 + setup: + - type: docker + run: | + apt-get update && \ + apt-get install -y build-essential wget && \ + wget --no-check-certificate https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static && \ + mv bedtools.static /usr/local/bin/bedtools && \ + chmod a+x /usr/local/bin/bedtools +runners: + - type: executable + - type: nextflow diff --git a/src/bedtools_genomecov/script.sh b/src/bedtools_genomecov/script.sh new file mode 100644 index 0000000..f6da15c --- /dev/null +++ b/src/bedtools_genomecov/script.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -eo pipefail + +prefix_forward="forward" +prefix_reverse="reverse" +if [ $par_strandedness == 'reverse' ]; then + prefix_forward="reverse" + prefix_reverse="forward" +fi + +bedtools genomecov \ + -ibam $par_bam \ + -bg \ + -strand + \ + $par_extra_bedtools_args | bedtools sort > $prefix_forward.bedGraph + +bedtools genomecov \ + -ibam $par_bam \ + -bg \ + -strand - \ + $par_extra_bedtools_args | bedtools sort > $prefix_reverse.bedGraph + +mv $prefix_forward.bedGraph $par_bedgraph_forward +mv $prefix_reverse.bedGraph $par_bedgraph_reverse diff --git a/src/bedtools_genomecov/test.sh b/src/bedtools_genomecov/test.sh new file mode 100644 index 0000000..3075d06 --- /dev/null +++ b/src/bedtools_genomecov/test.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +id="SRR6357070" +echo ">>> Testing $meta_functionality_name" + +"$meta_executable" \ + --strandedness unstranded \ + --bam $meta_resources_dir/chr19.bam \ + --bedgraph_forward chr19_forward.bedgraph \ + --bedgraph_reverse chr19_reverse.bedgraph + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +# check whether output exists +[ ! -f "chr19_forward.bedgraph" ] && echo "File 'chr19_forward.bedgraph' does not exist!" && exit 1 +[ ! -s "chr19_forward.bedgraph" ] && echo "File 'chr19_forward.bedgraph' is empty!" && exit 1 +[ ! -f "chr19_reverse.bedgraph" ] && echo "File 'chr19_reverse.bedgraph' does not exist!" && exit 1 +[ ! -s "chr19_reverse.bedgraph" ] && echo "File 'chr19_reverse.bedgraph' is empty!" && exit 1 + +echo "All tests succeeded!" +exit 0 \ No newline at end of file diff --git a/src/cat_additional_fasta/config.vsh.yaml b/src/cat_additional_fasta/config.vsh.yaml new file mode 100644 index 0000000..3235b23 --- /dev/null +++ b/src/cat_additional_fasta/config.vsh.yaml @@ -0,0 +1,54 @@ +name: "cat_additional_fasta" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/local/cat_additional_fasta.nf] + last_sha: 0a1bdcfbb498987643b74e9fccab85ccd9f2a17d +description: | + Concatenate addional fasta file to reference FASTA and GTF files. + +argument_groups: +- name: "Input" + arguments: + - name: "--fasta" + type: file + required: true + description: Path to FASTA genome file. + - name: "--gtf" + type: file + description: Path to GTF annotation file. + - name: "--additional_fasta" + type: file + description: FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences. + - name: "--biotype" + type: string + description: Biotype value to use when appending entries to GTF file when additional fasta file is provided. + +- name: "Output" + arguments: + - name: "--fasta_output" + type: file + direction: output + description: Concatenated FASTA file. + - name: "--gtf_output" + type: file + direction: output + description: Concatenated GTF file. + +resources: + - type: python_script + path: script.py + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/minimal_test/reference/genome.fasta + - path: /testData/minimal_test/reference/genes.gtf.gz + - path: /testData/minimal_test/reference/gfp.fa.gz + +engines: + - type: docker + image: python +runners: + - type: executable + - type: nextflow diff --git a/src/cat_additional_fasta/script.py b/src/cat_additional_fasta/script.py new file mode 100644 index 0000000..d06405b --- /dev/null +++ b/src/cat_additional_fasta/script.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +""" +Read a custom fasta file and create a custom GTF containing each entry +""" +from itertools import groupby +import logging +import os +import sys + +## VIASH START +par = { + "fasta": "testData/minimal_test/reference/genome.fasta", + "gtf": "testData/minimal_test/reference/genes.gtf", + "additional_fasta": "testData/minimal_test/reference/gfp.fa.gz", + "biotype": "gene_biotype", + "fasta_output": "genome_gfp.fasta", + "gtf_output": "genome_gfp.gtf", +} +meta = { + "functionality_name": "cat_additonal_fasta" +} +## VIASH END + +def fasta_iter(fasta_name): + """ + modified from Brent Pedersen + Correct Way To Parse A Fasta File In Python + given a fasta file. yield tuples of header, sequence + + Fasta iterator from https://www.biostars.org/p/710/#120760 + """ + with open(fasta_name) as fh: + # ditch the boolean (x[0]) and just keep the header or sequence since + # we know they alternate. + faiter = (x[1] for x in groupby(fh, lambda line: line[0] == ">")) + for header in faiter: + # drop the ">" + headerStr = header.__next__()[1:].strip() + # join all sequence lines to one. + seq = "".join(s.strip() for s in faiter.__next__()) + yield (headerStr, seq) + +def fasta2gtf(fasta, output, biotype): + fiter = fasta_iter(fasta) + # GTF output lines + lines = [] + attributes = 'exon_id "{name}.1"; exon_number "1";{biotype} gene_id "{name}_gene"; gene_name "{name}_gene"; gene_source "custom"; transcript_id "{name}_gene"; transcript_name "{name}_gene";\n' + line_template = "{name}\ttransgene\texon\t1\t{length}\t.\t+\t.\t" + attributes + for ff in fiter: + name, seq = ff + # Use first ID as separated by spaces as the "sequence name" + # (equivalent to "chromosome" in other cases) + seqname = name.split()[0] + # Remove all spaces + name = seqname.replace(" ", "_") + length = len(seq) + biotype_attr = "" + if biotype: + biotype_attr = f' {biotype} "transgene";' + line = line_template.format(name=name, length=length, biotype=biotype_attr) + lines.append(line) + with open(output, "w") as f: + f.write("".join(lines)) + +add_name = os.path.basename(par['additional_fasta']) +output = os.path.splitext(add_name)[0] + ".gtf" +fasta2gtf(par['additional_fasta'], output, par['biotype']) + +with open(par['fasta'], 'r') as f1: + content1 = f1.read() +with open(par['additional_fasta'], 'r') as f2: + content2 = f2.read() +with open(par['fasta_output'], 'w') as f_out: + f_out.write(content1 + content2) +with open(par['gtf'], 'r') as g1: + g_content1 = g1.read() +with open(output, 'r') as g2: + g_content2 = g2.read() +with open(par['gtf_output'], 'w') as g_out: + g_out.write(g_content1 + g_content2) diff --git a/src/cat_additional_fasta/test.sh b/src/cat_additional_fasta/test.sh new file mode 100644 index 0000000..c1c39f0 --- /dev/null +++ b/src/cat_additional_fasta/test.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +echo ">>> Testing $meta_functionality_name" + +gunzip "$meta_resources_dir/genes.gtf" +gunzip "$meta_resources_dir/gfp.fa" + +"$meta_executable" \ + --fasta "$meta_resources_dir/genome.fasta" \ + --gtf "$meta_resources_dir/genes.gtf" \ + --additional_fasta "$meta_resources_dir/gfp.fa" \ + --biotype gene_biotype \ + --fasta_output genome_gfp.fasta \ + --gtf_output genome_gfp.gtf + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">>> Checking whether output exists" +[ ! -f "genome_gfp.fasta" ] && echo "File 'genome_gfp.fasta' does not exist!" && exit 1 +[ ! -s "genome_gfp.fasta" ] && echo "File 'genome_gfp.fasta' is empty!" && exit 1 +[ ! -f "genome_gfp.gtf" ] && echo "File 'genome_gfp.gtf' does not exist!" && exit 1 +[ ! -s "genome_gfp.gtf" ] && echo "File 'genome_gfp.gtf' is empty!" && exit 1 + +echo "All tests succeeded!" +exit 0 diff --git a/src/cat_fastq/config.vsh.yaml b/src/cat_fastq/config.vsh.yaml new file mode 100644 index 0000000..d76ade7 --- /dev/null +++ b/src/cat_fastq/config.vsh.yaml @@ -0,0 +1,54 @@ +name: "cat_fastq" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/cat/fastq/main.nf, modules/nf-core/cat/fastq/meta.yml] + last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 +description: Concatenate multiple fastq files + +argument_groups: +- name: "Input" + arguments: + - name: "--read_1" + type: file + multiple: true + multiple_sep: ";" + description: Read 1 fastq files to be concatenated + - name: "--read_2" + type: file + multiple: true + multiple_sep: ";" + description: Read 2 fastq files to be concatenated + +- name: "Output" + arguments: + - name: "--fastq_1" + type: file + direction: output + default: $id_r1.fastq + description: Concatenated read 1 fastq + - name: "--fastq_2" + type: file + direction: output + must_exist: false + default: $id_r2.fastq + description: Concatenated read 2 fastq + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz + - path: /testData/minimal_test/input_fastq/SRR6357071_1.fastq.gz + - path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz + - path: /testData/minimal_test/input_fastq/SRR6357071_2.fastq.gz + +engines: + - type: docker + image: ubuntu:22.04 +runners: + - type: executable + - type: nextflow diff --git a/src/cat_fastq/script.sh b/src/cat_fastq/script.sh new file mode 100644 index 0000000..4d32d60 --- /dev/null +++ b/src/cat_fastq/script.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +set -eo pipefail + +IFS=";" read -ra read_1 <<< $par_read_1 +IFS=";" read -ra read_2 <<< $par_read_2 + +filename=$(basename -- "${read_1[0]}") +if [ ${filename##*.} == "gz" ]; then + command="zcat" +else + command="cat" +fi + +if [ ${#read_1[@]} -gt 0 ]; then + $command ${read_1[*]} > $par_fastq_1 +fi +if [ ${#read_2[@]} -gt 0 ]; then + $command ${read_2[*]} > $par_fastq_2 +fi diff --git a/src/cat_fastq/test.sh b/src/cat_fastq/test.sh new file mode 100644 index 0000000..8eb0d41 --- /dev/null +++ b/src/cat_fastq/test.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +echo ">>> Testing $meta_functionality_name" + +echo ">>> Testing paired-end read samples with multiple replicates" +"$meta_executable" \ + --read_1 $meta_resources_dir/SRR6357070_1.fastq.gz\;$meta_resources_dir/SRR6357071_1.fastq.gz \ + --read_2 $meta_resources_dir/SRR6357070_2.fastq.gz\;$meta_resources_dir/SRR6357071_2.fastq.gz \ + --fastq_1 read_1.merged.fastq \ + --fastq_2 read_2.merged.fastq + +echo ">>> Checking whether output exists" +[ ! -f "read_1.merged.fastq" ] && echo "Merged read 1 file does not exist!" && exit 1 +[ ! -s "read_1.merged.fastq" ] && echo "Merged read 1 file is empty!" && exit 1 +[ ! -f "read_2.merged.fastq" ] && echo "Merged read 2 file does not exist!" && exit 1 +[ ! -s "read_2.merged.fastq" ] && echo "Merged read 2 file is empty!" && exit 1 + +echo ">>> Check number of reads" +rep1_1=$(zcat $meta_resources_dir/SRR6357070_1.fastq.gz | echo $((`wc -l`/4))) +rep1_2=$(zcat $meta_resources_dir/SRR6357070_2.fastq.gz | echo $((`wc -l`/4))) +rep2_1=$(zcat $meta_resources_dir/SRR6357071_1.fastq.gz | echo $((`wc -l`/4))) +rep2_2=$(zcat $meta_resources_dir/SRR6357071_2.fastq.gz | echo $((`wc -l`/4))) +merged_1=$(cat read_1.merged.fastq | echo $((`wc -l`/4))) +merged_2=$(cat read_2.merged.fastq | echo $((`wc -l`/4))) +[[ $(( $rep1_1 + $rep2_1 )) != $merged_1 ]] || [[ $(( $rep1_2 + $rep2_2 )) != $merged_2 ]] && echo "Concatenation unsuccessful!" && exit 1 + +rm read_1.merged.fastq read_2.merged.fastq + +echo ">>> Testing single-end read samples with multiple replicates" +"$meta_executable" \ + --read_1 $meta_resources_dir/SRR6357070_1.fastq.gz\;$meta_resources_dir/SRR6357071_1.fastq.gz \ + --fastq_1 read_1.merged.fastq + +echo ">>> Checking whether output exists" +[ ! -f "read_1.merged.fastq" ] && echo "Merged read 1 file does not exist!" && exit 1 +[ ! -s "read_1.merged.fastq" ] && echo "Merged read 1 file is empty!" && exit 1 + +echo ">>> Check number of reads" +rep1_1=$(zcat $meta_resources_dir/SRR6357070_1.fastq.gz | echo $((`wc -l`/4))) +rep2_1=$(zcat $meta_resources_dir/SRR6357071_1.fastq.gz | echo $((`wc -l`/4))) +merged_1=$(cat read_1.merged.fastq | echo $((`wc -l`/4))) +[ $(( $rep1_1 + $rep2_1 )) != $merged_1 ] && echo "Concatenation unsuccessful!" && exit 1 +echo "All tests succeeded!" +exit 0 \ No newline at end of file diff --git a/src/deseq2_qc/config.vsh.yaml b/src/deseq2_qc/config.vsh.yaml new file mode 100644 index 0000000..8ee1be8 --- /dev/null +++ b/src/deseq2_qc/config.vsh.yaml @@ -0,0 +1,86 @@ +name: deseq2_qc +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/local/deseq2_qc.nf] + last_sha: 92b2a7857de1dda9d1c19a088941fc81e2976ff7 +description: | + run deseq2, perform pca, generate heatmaps and scatterplots for samples in the counts files + +argument_groups: +- name: "input" + arguments: + - name: "--counts" + type: file + description: Count file matrix where rows are genes and columns are samples. + required: true + - name: "--vst" + type: boolean + default: false + description: Use vst transformation instead of rlog with .DESeq2 + - name: "--count_col" + type: integer + default: 3 + description: First column containing sample count data. + - name: "--id_col" + type: integer + default: 1 + description: Column containing identifiers to be used. + - name: "--sample_suffix" + type: string + description: Suffix to remove after sample name in columns e.g. '.rmDup.bam' if 'DRUG_R1.rmDup.bam'. + default: "" + - name: "--outprefix" + type: string + default: deseq2 + description: Output prefix + - name: "--label" + type: string + description: Label to used in MultiQC report + +- name: "Output" + arguments: + - name: "--outdir" + type: file + direction: output + default: deseq2 + - name: "--pca_multiqc" + type: file + direction: output + default: deseq2.pca.vals_mqc.tsv + - name: "--sample_dists_multiqc" + type: file + direction: output + default: deseq2.sample.dists_mqc.tsv + +resources: + # adapted from https://github.com/nf-core/rnaseq/blob/3.12.0/bin/deseq2_qc.r + - type: r_script + path: script.r + # Add proper default headers as part of the component + - path: deseq2_pca_header.txt + - path: deseq2_clustering_header.txt + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/unit_test_resources/counts.tsv + +engines: + - type: docker + image: debian:latest + setup: + - type: apt + packages: + - libcurl4-openssl-dev + - r-base + - r-base-core + - libxml2-dev + - procps + - libssl-dev + - type: r + cran: [ optparse, ggplot2, RColorBrewer, pheatmap, stringr, matrixStats ] + bioc: [ DESeq2 ] +runners: + - type: executable + - type: nextflow diff --git a/src/deseq2_qc/deseq2_clustering_header.txt b/src/deseq2_qc/deseq2_clustering_header.txt new file mode 100644 index 0000000..04e10ef --- /dev/null +++ b/src/deseq2_qc/deseq2_clustering_header.txt @@ -0,0 +1,12 @@ +#id: 'deseq2_clustering' +#section_name: 'DESeq2 sample similarity' +#description: "is generated from clustering by Euclidean distances between +# DESeq2 +# rlog values for each sample +# in the deseq2_qc.r script." +#plot_type: 'heatmap' +#anchor: 'deseq2_clustering' +#pconfig: +# title: 'DESeq2: Heatmap of the sample-to-sample distances' +# xlab: True +# reverseColors: True diff --git a/src/deseq2_qc/deseq2_pca_header.txt b/src/deseq2_qc/deseq2_pca_header.txt new file mode 100644 index 0000000..2599732 --- /dev/null +++ b/src/deseq2_qc/deseq2_pca_header.txt @@ -0,0 +1,11 @@ +#id: 'deseq2_pca' +#section_name: 'DESeq2 PCA plot' +#description: "PCA plot between samples in the experiment. +# These values are calculated using DESeq2 +# in the deseq2_qc.r script." +#plot_type: 'scatter' +#anchor: 'deseq2_pca' +#pconfig: +# title: 'DESeq2: Principal component plot' +# xlab: PC1 +# ylab: PC2 diff --git a/src/deseq2_qc/script.r b/src/deseq2_qc/script.r new file mode 100755 index 0000000..a78c575 --- /dev/null +++ b/src/deseq2_qc/script.r @@ -0,0 +1,233 @@ +## VIASH START +par <- list( + counts = "testData/unit_test_resources/counts.tsv", + id_col = 1, + sample_suffix = "", + outprefix = "deseq2", + count_col = 2, + deseq2_output = "deseq2", + pca_multiqc = "pca.vals_mqc.tsv", + dists_multiqc = "sample.dists_mqc.tsv", + vst = FALSE, + outdir = '.' +) +meta <- list( + resources_dir = "src/deseq2_qc" +) +## VIASH END + +# REQUIREMENTS + +## PCA, HEATMAP AND SCATTERPLOTS FOR SAMPLES IN COUNTS FILE +## - SAMPLE NAMES HAVE TO END IN e.g. "_R1" REPRESENTING REPLICATE ID. LAST 3 CHARACTERS OF SAMPLE NAME WILL BE TRIMMED TO OBTAIN GROUP ID FOR DESEQ2 COMPARISONS. + +# LOAD LIBRARIES +library(DESeq2) +library(ggplot2) +library(RColorBrewer) +library(pheatmap) +library(stringr) + +if (file.exists(par$outdir) == FALSE) { + dir.create(par$outdir, recursive = TRUE) +} + +# READ IN COUNTS FILE +count_table <- read.delim(file = par$counts, header = TRUE, row.names = NULL) +rownames(count_table) <- count_table[, par$id_col] +count_table <- count_table[, par$count_col:ncol(count_table), drop = FALSE] +colnames(count_table) <- gsub(par$sample_suffix, "", colnames(count_table)) +colnames(count_table) <- gsub(pattern = '\\.$', replacement = '', colnames(count_table)) + +# RUN DESEQ2 +samples_vec <- colnames(count_table) +name_components <- strsplit(samples_vec, "_") +n_components <- length(name_components[[1]]) +decompose <- n_components != 1 && all(sapply(name_components, length) == n_components) +coldata <- data.frame(samples_vec, sample = samples_vec, row.names = 1) +if (decompose) { + groupings <- as.data.frame(lapply(1:n_components, function(i) sapply(name_components, "[[", i))) + n_distinct <- sapply(groupings, function(grp) length(unique(grp))) + groupings <- groupings[n_distinct != 1 & n_distinct != length(samples_vec)] + if (ncol(groupings) != 0) { + names(groupings) <- paste0("Group", 1:ncol(groupings)) + coldata <- cbind(coldata, groupings) + } else { + decompose <- FALSE + } +} + +DDSFile <- paste(par$outdir, "/", par$outprefix, ".dds.RData", sep = "") + +counts <- count_table[, samples_vec, drop = FALSE] +dds <- DESeqDataSetFromMatrix(countData = round(counts), colData = coldata, design = ~1) +dds <- estimateSizeFactors(dds) + +# No point if only one sample, or one gene +if (min(dim(count_table)) <= 1) { + save(dds, file = DDSFile) + saveRDS(dds, file = sub("\\.dds\\.RData$", ".rds", DDSFile)) + warning("Not enough samples or genes in counts file for PCA.", call. = FALSE) + quit(save = "no", status = 0, runLast = FALSE) +} + +if (!par$vst) { + vst_name <- "rlog" + rld <- rlog(dds) +} else { + vst_name <- "vst" + rld <- varianceStabilizingTransformation(dds) +} + +assay(dds, vst_name) <- assay(rld) +save(dds, file = DDSFile) +saveRDS(dds, file = sub("\\.dds\\.RData$", ".rds", DDSFile)) + +# PLOT QC + +##' PCA pre-processeor +##' +##' Generate all the necessary information to plot PCA from a DESeq2 object +##' in which an assay containing a variance-stabilised matrix of counts is +##' stored. Copied from DESeq2::plotPCA, but with additional ability to +##' say which assay to run the PCA on. +##' +##' @param object The DESeq2DataSet object. +##' @param ntop number of top genes to use for principla components, selected by highest row variance. +##' @param assay the name or index of the assay that stores the variance-stabilised data. +##' @return A data.frame containing the projected data alongside the grouping columns. +##' A 'percentVar' attribute is set which includes the percentage of variation each PC explains, +##' and additionally how much the variation within that PC is explained by the grouping variable. +##' @author Gavin Kelly + +plotPCA_vst <- function(object, ntop = 500, assay = length(assays(object))) { + rv <- rowVars(assay(object, assay), useNames = TRUE) + select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))] + pca <- prcomp(t(assay(object, assay)[select, ]), center = TRUE, scale = FALSE) + percentVar <- pca$sdev^2 / sum(pca$sdev^2) + df <- cbind(as.data.frame(colData(object)), pca$x) + # Order points so extreme samples are more likely to get label + ord <- order(abs(rank(df$PC1) - median(df$PC1)), abs(rank(df$PC2) - median(df$PC2))) + df <- df[ord, ] + attr(df, "percentVar") <- data.frame(PC = seq(along = percentVar), percentVar = 100 * percentVar) + return(df) +} + +PlotFile <- paste(par$outdir, "/", par$outprefix, ".plots.pdf", sep = "") + +pdf(file = PlotFile, onefile = TRUE, width = 7, height = 7) + +## PCA +ntop <- c(500, Inf) +for (n_top_var in ntop) { + pca_data <- plotPCA_vst(dds, assay = vst_name, ntop = n_top_var) + percentVar <- round(attr(pca_data, "percentVar")$percentVar) + plot_subtitle <- ifelse(n_top_var == Inf, "All genes", paste("Top", n_top_var, "genes")) + pl <- ggplot(pca_data, aes(PC1, PC2, label = paste0(" ", sample, " "))) + + geom_point() + + geom_text(check_overlap = TRUE, vjust = 0.5, hjust="inward") + + xlab(paste0("PC1: ", percentVar[1], "% variance")) + + ylab(paste0("PC2: ", percentVar[2], "% variance")) + + labs(title = paste0("First PCs on ", vst_name, "-transformed data"), subtitle = plot_subtitle) + + theme(legend.position = "top", + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + panel.border = element_rect(colour = "black", fill = NA, size = 1)) + print(pl) + + if (decompose) { + pc_names <- paste0("PC", attr(pca_data, "percentVar")$PC) + long_pc <- reshape(pca_data, varying=pc_names, direction="long", sep="", timevar="component", idvar="pcrow") + long_pc <- subset(long_pc, component <= 5) + long_pc_grp <- reshape(long_pc, varying = names(groupings), direction = "long", sep = "", timevar = "grouper") + long_pc_grp <- subset(long_pc_grp, grouper <= 5) + long_pc_grp$component <- paste("PC", long_pc_grp$component) + long_pc_grp$grouper <- paste0(long_pc_grp$grouper, c("st", "nd", "rd", "th", "th")[long_pc_grp$grouper], " prefix") + pl <- ggplot(long_pc_grp, aes(x = Group, y = PC)) + + geom_point() + + stat_summary(fun = mean, geom = "line", aes(group = 1)) + + labs(x = NULL, y = NULL, subtitle = plot_subtitle, title = "PCs split by sample-name prefixes") + + facet_grid(component ~ grouper, scales = "free_x") + + scale_x_discrete(guide = guide_axis(n.dodge = 3)) + print(pl) + } +} # at end of loop, we'll be using the user-defined ntop if any, else all genes + +## WRITE PC1 vs PC2 VALUES TO FILE +pca_vals <- pca_data[, c("PC1", "PC2")] +colnames(pca_vals) <- paste0(colnames(pca_vals), ": ", percentVar[1:2], '% variance') +pca_vals <- cbind(sample = rownames(pca_vals), pca_vals) +pca_vals_file <- paste(par$outdir, "/", par$outprefix, ".pca_vals.txt", sep = "") +write.table(pca_vals, file = pca_vals_file, + row.names = FALSE, col.names = TRUE, + sep = "\t", quote = TRUE) + +## SAMPLE CORRELATION HEATMAP +sampleDists <- dist(t(assay(dds, vst_name))) +sampleDistMatrix <- as.matrix(sampleDists) +colors <- colorRampPalette(rev(brewer.pal(9, "Blues")))(255) +pheatmap( + sampleDistMatrix, + clustering_distance_rows = sampleDists, + clustering_distance_cols = sampleDists, + col = colors, + main = paste("Euclidean distance between", vst_name, "of samples") +) + +## WRITE SAMPLE DISTANCES TO FILE +sample_dist_file <- paste(par$outdir, "/", par$outprefix, ".sample.dists.txt", sep = "") +write.table(cbind(sample = rownames(sampleDistMatrix), sampleDistMatrix), + file = sample_dist_file, row.names = FALSE, + col.names = TRUE, sep = "\t", quote = FALSE) +dev.off() + +# SAVE SIZE FACTORS +SizeFactorsDir <- paste0(par$outdir, "/size_factors/") +if (file.exists(SizeFactorsDir) == FALSE) { + dir.create(SizeFactorsDir, recursive = TRUE) +} + +NormFactorsFile <- paste(SizeFactorsDir, par$outprefix, ".size_factors.RData", sep = "") + +normFactors <- sizeFactors(dds) +save(normFactors, file = NormFactorsFile) + +for (name in names(sizeFactors(dds))) { + sizeFactorFile <- paste(SizeFactorsDir, name, ".txt", sep = "") + write(as.numeric(sizeFactors(dds)[name]), file = sizeFactorFile) +} + +# R SESSION INFO +RLogFile <- "R_sessionInfo.log" + +sink(RLogFile) +a <- sessionInfo() +print(a) +sink() + +# Prepare files for MultiQC + +readLines(paste0(meta$resources_dir, "/deseq2_pca_header.txt")) |> + stringr::str_replace(pattern = "#id: 'deseq2_pca'", + replace = paste0("#id: '", par$label, "_deseq2_pca'")) |> + writeLines(con = "tmp.txt") + +readLines(paste0("tmp.txt")) |> + stringr::str_replace(pattern = "#section_name: 'DESeq2 PCA plot'", + replace = paste0("#section_name: 'DESeq2 PCA plot - '", par$label)) |> + writeLines(con = "tmp.txt") + +system2("cat", args = paste0("tmp.txt ", pca_vals_file), stdout = par$pca_multiqc) + +readLines(paste0(meta$resources_dir, "/deseq2_clustering_header.txt")) |> + stringr::str_replace(pattern = "#id: 'deseq2_clustering'", + replace = paste0("#id: '", par$label, "_deseq2_clustering'")) |> + writeLines(con = "tmp.txt") + +readLines(paste0("tmp.txt")) |> + stringr::str_replace(pattern = "#section_name: 'DESeq2 sample similarity'", + replace = paste0("#section_name: 'DESeq2 sample similarity - '", par$label)) |> + writeLines(con = "tmp.txt") + +system2("cat", args = paste0("tmp.txt ", sample_dist_file), stdout = par$sample_dists_multiqc) diff --git a/src/deseq2_qc/test.sh b/src/deseq2_qc/test.sh new file mode 100644 index 0000000..867b9d9 --- /dev/null +++ b/src/deseq2_qc/test.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Run executable +echo "> Running $meta_functionality_name" + +"$meta_executable" \ + --counts $meta_resources_dir/counts.tsv \ + --id_col 1 \ + --sample_suffix '' \ + --outprefix deseq2 \ + --count_col 2 \ + --deseq2_output "deseq2/" \ + --pca_multiqc pca.vals_mqc.tsv \ + --sample_dists_multiqc sample.dists_mqc.tsv \ + --outdir deseq2 + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> Check whether output exists" + +[ ! -d "deseq2" ] && echo "deseq2 was not created" && exit 1 +[ -z "$(ls -A 'deseq2')" ] && echo "deseq2 is empty" && exit 1 +[ ! -f "pca.vals_mqc.tsv" ] && echo "pca.vals_mqc.tsv was not created" && exit 1 +[ ! -s "pca.vals_mqc.tsv" ] && echo "pca.vals_mqc.tsv is empty" && exit 1 +[ ! -f "sample.dists_mqc.tsv" ] && echo "sample.dists_mqc.tsv was not created" && exit 1 +[ ! -s "sample.dists_mqc.tsv" ] && echo "sample.dists_mqc.tsv is empty" && exit 1 + +exit 0 diff --git a/src/dupradar/config.vsh.yaml b/src/dupradar/config.vsh.yaml new file mode 100644 index 0000000..3bc7bb1 --- /dev/null +++ b/src/dupradar/config.vsh.yaml @@ -0,0 +1,118 @@ +name: "dupradar" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/local/dupradar.nf] + last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 +description: | + Assessment of duplication rates in RNA-Seq datasets + +argument_groups: +- name: "Input" + arguments: + - name: "--id" + type: string + description: Sample ID + + - name: "--input" + type: file + required: true + description: path to input alignment file in BAM format + + - name: "--gtf_annotation" + type: file + required: true + description: path to GTF annotation file. + + - name: "--paired" + type: boolean + description: add flag if input alignment file consists of paired reads + + - name: "--strandedness" + type: string + required: false + choices: ["forward", "reverse", "unstranded"] + description: strandedness of input bam file reads (forward, reverse or unstranded (default, applicable to paired reads)) + + +- name: "Output" + arguments: + - name: "--output_dupmatrix" + type: file + direction: output + required: false + must_exist: true + default: $id.dup_matrix.txt + description: path to output file (txt) of duplicate tag counts + + - name: "--output_dup_intercept_mqc" + type: file + direction: output + required: false + must_exist: true + default: $id.dup_intercept_mqc.txt + description: path to output file (txt) of multiqc intercept value DupRadar + + - name: "--output_duprate_exp_boxplot" + type: file + direction: output + required: false + must_exist: true + default: $id.duprate_exp_boxplot.pdf + description: path to output file (pdf) of distribution of expression box plot + + - name: "--output_duprate_exp_densplot" + type: file + direction: output + required: false + must_exist: true + default: $id.duprate_exp_densityplot.pdf + description: path to output file (pdf) of 2D density scatter plot of duplicate tag counts + + - name: "--output_duprate_exp_denscurve_mqc" + type: file + direction: output + required: false + must_exist: true + default: $id.duprate_exp_density_curve_mqc.txt + description: path to output file (pdf) of density curve of gene duplication multiqc + + - name: "--output_expression_histogram" + type: file + direction: output + required: false + must_exist: true + default: $id.expression_hist.pdf + description: path to output file (pdf) of distribution of RPK values per gene histogram + + - name: "--output_intercept_slope" + type: file + direction: output + required: false + must_exist: true + default: $id.intercept_slope.txt + description: output file (txt) with progression of duplication rate value + +resources: + - type: bash_script + path: script.sh + # Copied from https://github.com/nf-core/rnaseq/blob/3.12.0/bin/dupradar.r + - path: dupradar.r + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam + - path: /testData/unit_test_resources/genes.gtf + +engines: + - type: docker + image: ubuntu:22.04 + setup: + - type: apt + packages: [ r-base ] + - type: r + bioc: [ dupRadar ] +runners: + - type: executable + - type: nextflow diff --git a/src/dupradar/dupradar.r b/src/dupradar/dupradar.r new file mode 100644 index 0000000..82218c6 --- /dev/null +++ b/src/dupradar/dupradar.r @@ -0,0 +1,154 @@ +#!/usr/bin/env Rscript + +# Command line argument processing +args = commandArgs(trailingOnly=TRUE) +if (length(args) < 5) { + stop("Usage: dupRadar.r ", call.=FALSE) +} + +message("paired_end is", args[5]) +message("the type is is", class(args[5])) + +input_bam <- args[1] +output_prefix <- args[2] +annotation_gtf <- args[3] +stranded <- as.numeric(args[4]) +paired_end <- ifelse(args[5] == "true", TRUE, FALSE) +threads <- as.numeric(args[6]) + +bamRegex <- "(.+)\\.bam$" + +if(!(grepl(bamRegex, input_bam) && file.exists(input_bam) && (!file.info(input_bam)$isdir))) stop("First argument '' must be an existing file (not a directory) with '.bam' extension...") +if(!(file.exists(annotation_gtf) && (!file.info(annotation_gtf)$isdir))) stop("Third argument '' must be an existing file (and not a directory)...") +if(is.na(stranded) || (!(stranded %in% (0:2)))) stop("Fourth argument must be a numeric value in 0(unstranded)/1(forward)/2(reverse)...") +if(is.na(threads) || (threads<=0)) stop("Fifth argument must be a strictly positive numeric value...") + +# Debug messages (stderr) +message("Input bam (Arg 1): ", input_bam) +message("Output basename(Arg 2): ", output_prefix) +message("Input gtf (Arg 3): ", annotation_gtf) +message("Strandness (Arg 4): ", c("unstranded", "forward", "reverse")[stranded+1]) +message("paired_end (Arg 5): ", paired_end) +message("Nb threads (Arg 6): ", threads) +message("R package loc. (Arg 7): ", ifelse(length(args) > 4, args[5], "Not specified")) + + +# Load / install packages +if (length(args) > 5) { .libPaths( c( args[6], .libPaths() ) ) } +if (!require("dupRadar")){ + source("http://bioconductor.org/biocLite.R") + biocLite("dupRadar", suppressUpdates=TRUE) + library("dupRadar") +} +if (!require("parallel")) { + install.packages("parallel", dependencies=TRUE, repos='http://cloud.r-project.org/') + library("parallel") +} + +# Duplicate stats +dm <- analyzeDuprates(input_bam, annotation_gtf, stranded, paired_end, threads) +write.table(dm, file=paste(output_prefix, "_dupMatrix.txt", sep=""), quote=F, row.name=F, sep="\t") + +# 2D density scatter plot +pdf(paste0(output_prefix, "_duprateExpDens.pdf")) +duprateExpDensPlot(DupMat=dm) +title("Density scatter plot") +mtext(output_prefix, side=3) +dev.off() +fit <- duprateExpFit(DupMat=dm) +cat( + paste("- dupRadar Int (duprate at low read counts):", fit$intercept), + paste("- dupRadar Sl (progression of the duplication rate):", fit$slope), + fill=TRUE, labels=output_prefix, + file=paste0(output_prefix, "_intercept_slope.txt"), append=FALSE +) + +# Create a multiqc file dupInt +sample_name <- gsub("Aligned.sortedByCoord.out.markDups", "", output_prefix) +line="#id: DupInt +#plot_type: 'generalstats' +#pconfig: +# dupRadar_intercept: +# title: 'dupInt' +# namespace: 'DupRadar' +# description: 'Intercept value from DupRadar' +# max: 100 +# min: 0 +# scale: 'RdYlGn-rev' +# format: '{:.2f}%' +Sample dupRadar_intercept" + +write(line,file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE) +write(paste(sample_name, fit$intercept),file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE) + +# Get numbers from dupRadar GLM +curve_x <- sort(log10(dm$RPK)) +curve_y = 100*predict(fit$glm, data.frame(x=curve_x), type="response") +# Remove all of the infinite values +infs = which(curve_x %in% c(-Inf,Inf)) +curve_x = curve_x[-infs] +curve_y = curve_y[-infs] +# Reduce number of data points +curve_x <- curve_x[seq(1, length(curve_x), 10)] +curve_y <- curve_y[seq(1, length(curve_y), 10)] +# Convert x values back to real counts +curve_x = 10^curve_x +# Write to file +line="#id: dupradar +#section_name: 'DupRadar' +#section_href: 'bioconductor.org/packages/release/bioc/html/dupRadar.html' +#description: \"provides duplication rate quality control for RNA-Seq datasets. Highly expressed genes can be expected to have a lot of duplicate reads, but high numbers of duplicates at low read counts can indicate low library complexity with technical duplication. +# This plot shows the general linear models - a summary of the gene duplication distributions. \" +#pconfig: +# title: 'DupRadar General Linear Model' +# xLog: True +# xlab: 'expression (reads/kbp)' +# ylab: '% duplicate reads' +# ymax: 100 +# ymin: 0 +# tt_label: '{point.x:.1f} reads/kbp: {point.y:,.2f}% duplicates' +# xPlotLines: +# - color: 'green' +# dashStyle: 'LongDash' +# label: +# style: {color: 'green'} +# text: '0.5 RPKM' +# verticalAlign: 'bottom' +# y: -65 +# value: 0.5 +# width: 1 +# - color: 'red' +# dashStyle: 'LongDash' +# label: +# style: {color: 'red'} +# text: '1 read/bp' +# verticalAlign: 'bottom' +# y: -65 +# value: 1000 +# width: 1" + +write(line,file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"),append=TRUE) +write.table( + cbind(curve_x, curve_y), + file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"), + quote=FALSE, row.names=FALSE, col.names=FALSE, append=TRUE, +) + +# Distribution of expression box plot +pdf(paste0(output_prefix, "_duprateExpBoxplot.pdf")) +duprateExpBoxplot(DupMat=dm) +title("Percent Duplication by Expression") +mtext(output_prefix, side=3) +dev.off() + +# Distribution of RPK values per gene +pdf(paste0(output_prefix, "_expressionHist.pdf")) +expressionHist(DupMat=dm) +title("Distribution of RPK values per gene") +mtext(output_prefix, side=3) +dev.off() + +# Print sessioninfo to standard out +print(output_prefix) +citation("dupRadar") +sessionInfo() diff --git a/src/dupradar/script.sh b/src/dupradar/script.sh new file mode 100644 index 0000000..6f7caee --- /dev/null +++ b/src/dupradar/script.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +set -exo pipefail + +function num_strandness { + if [ $par_strandedness == 'unstranded' ]; then echo 0 + elif [ $par_strandedness == 'forward' ]; then echo 1 + elif [ $par_strandedness == 'reverse' ]; then echo 2 + else echo "strandedness must be unstranded, forward or reverse." && \ + exit 1 + fi +} + +Rscript "$meta_resources_dir/dupradar.r" \ + $par_input \ + $par_id \ + $par_gtf_annotation \ + $(num_strandness) \ + $par_paired \ + ${meta_cpus:-1} + +mv "$par_id"_dupMatrix.txt $par_output_dupmatrix +mv "$par_id"_dup_intercept_mqc.txt $par_output_dup_intercept_mqc +mv "$par_id"_duprateExpBoxplot.pdf $par_output_duprate_exp_boxplot +mv "$par_id"_duprateExpDens.pdf $par_output_duprate_exp_densplot +mv "$par_id"_duprateExpDensCurve_mqc.txt $par_output_duprate_exp_denscurve_mqc +mv "$par_id"_expressionHist.pdf $par_output_expression_histogram +mv "$par_id"_intercept_slope.txt $par_output_intercept_slope diff --git a/src/dupradar/test.sh b/src/dupradar/test.sh new file mode 100644 index 0000000..aa9f60d --- /dev/null +++ b/src/dupradar/test.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# define input and output for script +input_bam="$meta_resources_dir/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam" +input_gtf="$meta_resources_dir/genes.gtf" + +output_dupmatrix="dup_matrix.txt" +output_dup_intercept_mqc="dup_intercept_mqc.txt" +output_duprate_exp_boxplot="duprate_exp_boxplot.pdf" +output_duprate_exp_densplot="duprate_exp_densityplot.pdf" +output_duprate_exp_denscurve_mqc="duprate_exp_density_curve_mqc.pdf" +output_expression_histogram="expression_hist.pdf" +output_intercept_slope="intercept_slope.txt" + +# Run executable +echo "> Running $meta_functionality_name for unpaired reads, writing to tmpdir $tmpdir." + +"$meta_executable" \ + --input "$input_bam" \ + --id "test" \ + --gtf_annotation "$input_gtf" \ + --strandedness "forward" \ + --paired false \ + --output_dupmatrix $output_dupmatrix \ + --output_dup_intercept_mqc $output_dup_intercept_mqc \ + --output_duprate_exp_boxplot $output_duprate_exp_boxplot \ + --output_duprate_exp_densplot $output_duprate_exp_densplot \ + --output_duprate_exp_denscurve_mqc $output_duprate_exp_denscurve_mqc \ + --output_expression_histogram $output_expression_histogram \ + --output_intercept_slope $output_intercept_slope + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> asserting output has been created for paired read input" +[ ! -f "$output_dupmatrix" ] && echo "$output_dupmatrix was not created" && exit 1 +[ ! -s "$output_dupmatrix" ] && echo "$output_dupmatrix is empty" && exit 1 +[ ! -f "$output_dup_intercept_mqc" ] && echo "$output_dup_intercept_mqc was not created" && exit 1 +[ ! -s "$output_dup_intercept_mqc" ] && echo "$output_dup_intercept_mqc is empty" && exit 1 +[ ! -f "$output_duprate_exp_boxplot" ] && echo "$output_duprate_exp_boxplot was not created" && exit 1 +[ ! -s "$output_duprate_exp_boxplot" ] && echo "$output_duprate_exp_boxplot is empty" && exit 1 +[ ! -f "$output_duprate_exp_densplot" ] && echo "$output_duprate_exp_densplot was not created" && exit 1 +[ ! -s "$output_duprate_exp_densplot" ] && echo "$output_duprate_exp_densplot is empty" && exit 1 +[ ! -f "$output_duprate_exp_denscurve_mqc" ] && echo "$output_duprate_exp_denscurve_mqc was not created" && exit 1 +[ ! -s "$output_duprate_exp_denscurve_mqc" ] && echo "$output_duprate_exp_denscurve_mqc is empty" && exit 1 +[ ! -f "$output_expression_histogram" ] && echo "$output_expression_histogram was not created" && exit 1 +[ ! -s "$output_expression_histogram" ] && echo "$output_expression_histogram is empty" && exit 1 +[ ! -f "$output_intercept_slope" ] && echo "$output_intercept_slope was not created" && exit 1 +[ ! -s "$output_intercept_slope" ] && echo "$output_intercept_slope is empty" && exit 1 + +exit 0 \ No newline at end of file diff --git a/src/extra/copy_if_exists/config.vsh.yaml b/src/extra/copy_if_exists/config.vsh.yaml new file mode 100644 index 0000000..ffd4da7 --- /dev/null +++ b/src/extra/copy_if_exists/config.vsh.yaml @@ -0,0 +1,32 @@ +name: "copy_if_exists" +argument_groups: + - name: "Input" + arguments: + - name: "--required_file" + type: file + must_exist: false + required: true + example: /tmp/rnaseq_workflow_config/required_file.txt + - name: --optional_file + type: file + must_exist: false + example: /tmp/rnaseq_workflow_config/optional_file.txt + + - name: "Ouput" + arguments: + - name: "--output" + type: file + direction: output + default: copy_if_exists_output +resources: + - type: bash_script + path: script.sh + - path: /src/assets/required_file.txt + - path: /src/assets/optional_file.txt + +engines: + - type: docker + image: ubuntu:22.04 +runners: + - type: executable + - type: nextflow diff --git a/src/extra/copy_if_exists/script.sh b/src/extra/copy_if_exists/script.sh new file mode 100644 index 0000000..c14ce5f --- /dev/null +++ b/src/extra/copy_if_exists/script.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -eo pipefail + +mkdir -p $par_output + +# This file is checked by the Nextflow module wrapper +cp $par_required_file "$par_output" + +# If the variable is empty, we use the default one (registered as a resource) +if [ -z $par_optional_file ]; then + echo "No optional_file provided, using the default" + cp $meta_resources_dir/optional_file.txt "$par_output" +else + echo "Optional file provided" + if [ -f $par_optional_file ]; then + cp $par_optional_file "$par_output" + else + # Unreachable: the Viash-generated module checks this + echo "Optional file does not exist" + exit 1 + fi +fi + +echo "Done" diff --git a/src/getchromsizes/config.vsh.yaml b/src/getchromsizes/config.vsh.yaml new file mode 100644 index 0000000..687e43b --- /dev/null +++ b/src/getchromsizes/config.vsh.yaml @@ -0,0 +1,57 @@ +name: "getchromsizes" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/custom/getchromsizes/main.nf, modules/nf-core/custom/getchromsizes/meta.yml] + last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 +description: | + Generates a FASTA file of chromosome sizes and a fasta index file. + +argument_groups: +- name: "Input" + arguments: + - name: "--fasta" + type: file + description: Genome fasta files + +- name: "Output" + arguments: + - name: "--sizes" + type: file + direction: output + description: File containing chromosome lengths + - name: "--fai" + type: file + description: FASTA index file + direction: output + - name: "--gzi" # optional + type: file + description: Optional gzip index file for compressed inputs + direction: output + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - path: /testData/minimal_test/reference/genome.fasta + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: docker + run: | + apt-get update && \ + apt-get install -y autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev libncurses5-dev curl bzip2 && \ + curl -fsSL https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2 -o samtools-1.18.tar.bz2 && \ + tar -xjf samtools-1.18.tar.bz2 && \ + rm samtools-1.18.tar.bz2 && \ + cd samtools-1.18 && \ + ./configure && \ + make && \ + make install +runners: + - type: executable + - type: nextflow diff --git a/src/getchromsizes/script.sh b/src/getchromsizes/script.sh new file mode 100755 index 0000000..18fed86 --- /dev/null +++ b/src/getchromsizes/script.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -eo pipefail + +filename="$(basename -- $par_fasta)" + +samtools faidx $par_fasta +cut -f 1,2 "$par_fasta.fai" > $par_sizes +mv "$par_fasta.fai" $par_fai diff --git a/src/getchromsizes/test.sh b/src/getchromsizes/test.sh new file mode 100644 index 0000000..291ddd8 --- /dev/null +++ b/src/getchromsizes/test.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +echo "Testing $meta_functionality_name" +"$meta_executable" \ + --fasta "$meta_resources_dir/genome.fasta" \ + --sizes genome.fasta.sizes \ + --fai genome.fasta.fai + +echo ">>> Checking whether output exists" +[ ! -f "genome.fasta.sizes" ] && echo "Chromosome lengths file does not exist!" && exit 1 +[ ! -s "genome.fasta.sizes" ] && echo "Chromosome lengths file is empty!" && exit 1 +[ ! -f "genome.fasta.fai" ] && echo "FASTA index file does not exist!" && exit 1 +[ ! -s "genome.fasta.fai" ] && echo "FASTA index file does is empty!" && exit 1 + +echo "All tests succeeded!" +exit 0 \ No newline at end of file diff --git a/src/gtf2bed/config.vsh.yaml b/src/gtf2bed/config.vsh.yaml new file mode 100644 index 0000000..e1ec89d --- /dev/null +++ b/src/gtf2bed/config.vsh.yaml @@ -0,0 +1,45 @@ +name: "gtf2bed" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/local/gtf2bed.nf] + last_sha: 0a1bdcfbb498987643b74e9fccab85ccd9f2a17d +description: | + Create BED annotation file from GTF. + +argument_groups: +- name: "Input" + arguments: + - name: "--gtf" + type: file + required: true + description: A reference file in GTF format. + +- name: " Output" + arguments: + - name: "--bed_output" + type: file + direction: output + required: true + description: BED file resulting from the conversion of the GTF input file. + +resources: + - type: bash_script + path: script.sh + # Copied from https://github.com/nf-core/rnaseq/blob/3.12.0/bin/gtf2bed + - path: gtf2bed.pl + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/minimal_test/reference/genes.gtf.gz + +engines: + - type: docker + image: ubuntu:22.04 + setup: + - type: apt + packages: [perl] +runners: + - type: executable + - type: nextflow diff --git a/src/gtf2bed/gtf2bed.pl b/src/gtf2bed/gtf2bed.pl new file mode 100755 index 0000000..4da5c04 --- /dev/null +++ b/src/gtf2bed/gtf2bed.pl @@ -0,0 +1,122 @@ +#!/usr/bin/env perl + +# Copyright (c) 2011 Erik Aronesty (erik@q32.com) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ALSO, IT WOULD BE NICE IF YOU LET ME KNOW YOU USED IT. + +use Getopt::Long; + +my $extended; +GetOptions("x"=>\$extended); + +$in = shift @ARGV; + +my $in_cmd =($in =~ /\.gz$/ ? "gunzip -c $in|" : $in =~ /\.zip$/ ? "unzip -p $in|" : "$in") || die "Can't open $in: $!\n"; +open IN, $in_cmd; + +while () { + $gff = 2 if /^##gff-version 2/; + $gff = 3 if /^##gff-version 3/; + next if /^#/ && $gff; + + s/\s+$//; + # 0-chr 1-src 2-feat 3-beg 4-end 5-scor 6-dir 7-fram 8-attr + my @f = split /\t/; + if ($gff) { + # most ver 2's stick gene names in the id field + ($id) = $f[8]=~ /\bID="([^"]+)"/; + # most ver 3's stick unquoted names in the name field + ($id) = $f[8]=~ /\bName=([^";]+)/ if !$id && $gff == 3; + } else { + ($id) = $f[8]=~ /transcript_id "([^"]+)"/; + } + + next unless $id && $f[0]; + + if ($f[2] eq 'exon') { + die "no position at exon on line $." if ! $f[3]; + # gff3 puts :\d in exons sometimes + $id =~ s/:\d+$// if $gff == 3; + push @{$exons{$id}}, \@f; + # save lowest start + $trans{$id} = \@f if !$trans{$id}; + } elsif ($f[2] eq 'start_codon') { + #optional, output codon start/stop as "thick" region in bed + $sc{$id}->[0] = $f[3]; + } elsif ($f[2] eq 'stop_codon') { + $sc{$id}->[1] = $f[4]; + } elsif ($f[2] eq 'miRNA' ) { + $trans{$id} = \@f if !$trans{$id}; + push @{$exons{$id}}, \@f; + } +} + +for $id ( + # sort by chr then pos + sort { + $trans{$a}->[0] eq $trans{$b}->[0] ? + $trans{$a}->[3] <=> $trans{$b}->[3] : + $trans{$a}->[0] cmp $trans{$b}->[0] + } (keys(%trans)) ) { + my ($chr, undef, undef, undef, undef, undef, $dir, undef, $attr, undef, $cds, $cde) = @{$trans{$id}}; + my ($cds, $cde); + ($cds, $cde) = @{$sc{$id}} if $sc{$id}; + + # sort by pos + my @ex = sort { + $a->[3] <=> $b->[3] + } @{$exons{$id}}; + + my $beg = $ex[0][3]; + my $end = $ex[-1][4]; + + if ($dir eq '-') { + # swap + $tmp=$cds; + $cds=$cde; + $cde=$tmp; + $cds -= 2 if $cds; + $cde += 2 if $cde; + } + + # not specified, just use exons + $cds = $beg if !$cds; + $cde = $end if !$cde; + + # adjust start for bed + --$beg; --$cds; + + my $exn = @ex; # exon count + my $exst = join ",", map {$_->[3]-$beg-1} @ex; # exon start + my $exsz = join ",", map {$_->[4]-$_->[3]+1} @ex; # exon size + + my $gene_id; + my $extend = ""; + if ($extended) { + ($gene_id) = $attr =~ /gene_name "([^"]+)"/; + ($gene_id) = $attr =~ /gene_id "([^"]+)"/ unless $gene_id; + $extend="\t$gene_id"; + } + # added an extra comma to make it look exactly like ucsc's beds + print "$chr\t$beg\t$end\t$id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n"; +} + +close IN; diff --git a/src/gtf2bed/script.sh b/src/gtf2bed/script.sh new file mode 100755 index 0000000..2d73c19 --- /dev/null +++ b/src/gtf2bed/script.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -eo pipefail + +perl "$meta_resources_dir/gtf2bed.pl" $par_gtf > $par_bed_output diff --git a/src/gtf2bed/test.sh b/src/gtf2bed/test.sh new file mode 100644 index 0000000..91aeec5 --- /dev/null +++ b/src/gtf2bed/test.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +gunzip "$meta_resources_dir/genes.gtf.gz" + +echo ">>> Testing $meta_functionality_name" +"$meta_executable" \ + --gtf "$meta_resources_dir/genes.gtf" \ + --bed_output genes.bed + +echo ">>> Check whether output exists" +[ ! -f "genes.bed" ] && echo "BED output file does not exist!" && exit 1 +[ ! -s "genes.bed" ] && echo "BED output file is empty!" && exit 1 + +echo "All tests succeeded!" +exit 0 \ No newline at end of file diff --git a/src/gtf_filter/config.vsh.yaml b/src/gtf_filter/config.vsh.yaml new file mode 100644 index 0000000..da4b3af --- /dev/null +++ b/src/gtf_filter/config.vsh.yaml @@ -0,0 +1,45 @@ +name: "gtf_filter" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/local/gtf_filter.nf] + last_sha: 1c6012ecbb087014ea4b8f0f3d39b874850277a8 +description: | + Filters a GTF file based on sequence names in a FASTA file. + +argument_groups: +- name: "Input" + arguments: + - name: "--fasta" + type: file + description: Genome fasta file + - name: "--gtf" + type: file + description: GTF file + - name: "--skip_transcript_id_check" + type: boolean_true + description: Skip checking for transcript IDs in the GTF file. + +- name: " Output" + arguments: + - name: "--filtered_gtf" + type: file + direction: output + description: Filtered GTF file containing only sequences in the FASTA file + +resources: + - type: python_script + path: script.py + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/minimal_test/reference/genome.fasta + - path: /testData/minimal_test/reference/genes.gtf.gz + +engines: + - type: docker + image: python +runners: + - type: executable + - type: nextflow diff --git a/src/gtf_filter/script.py b/src/gtf_filter/script.py new file mode 100644 index 0000000..f98e73d --- /dev/null +++ b/src/gtf_filter/script.py @@ -0,0 +1,47 @@ +# Adapted from https://github.com/nf-core/rnaseq/blob/3.14.0/bin/filter_gtf.py + +import os +import sys +import re +import statistics +from typing import Set + +def extract_fasta_seq_names(fasta_name: str) -> Set[str]: + """Extracts the sequence names from a FASTA file.""" + with open(fasta_name) as fasta: + return {line[1:].split(None, 1)[0] for line in fasta if line.startswith(">")} + +def tab_delimited(file: str) -> float: + """Check if file is tab-delimited and return median number of tabs.""" + with open(file, "r") as f: + data = f.read(102400) + return statistics.median(line.count("\t") for line in data.split("\n")) + +def filter_gtf(fasta: str, gtf_in: str, filtered_gtf_out: str, skip_transcript_id_check: bool) -> None: + """Filter GTF file based on FASTA sequence names.""" + if tab_delimited(gtf_in) != 8: + raise ValueError("Invalid GTF file: Expected 9 tab-separated columns.") + seq_names_in_genome = extract_fasta_seq_names(fasta) + print(f"Extracted chromosome sequence names from {fasta}") + print("All sequence IDs from FASTA: " + ", ".join(sorted(seq_names_in_genome))) + seq_names_in_gtf = set() + try: + with open(gtf_in) as gtf, open(filtered_gtf_out, "w") as out: + line_count = 0 + for line in gtf: + seq_name = line.split("\t")[0] + seq_names_in_gtf.add(seq_name) # Add sequence name to the set + if seq_name in seq_names_in_genome: + if skip_transcript_id_check or re.search(r'transcript_id "([^"]+)"', line): + out.write(line) + line_count += 1 + if line_count == 0: + raise ValueError("All GTF lines removed by filters") + except IOError as e: + print(f"File operation failed: {e}") + return + + print("All sequence IDs from GTF: " + ", ".join(sorted(seq_names_in_gtf))) + print(f"Extracted {line_count} matching sequences from {gtf_in} into {filtered_gtf_out}") + +filter_gtf(par["fasta"], par["gtf"], par["filtered_gtf"], par["skip_transcript_id_check"]) diff --git a/src/gtf_filter/test.sh b/src/gtf_filter/test.sh new file mode 100644 index 0000000..dcf621c --- /dev/null +++ b/src/gtf_filter/test.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +gunzip "$meta_resources_dir/genes.gtf.gz" + +echo ">>>Testing $metat_functionality_name" +"$meta_executable" \ + --fasta "$meta_resources_dir/genome.fasta" \ + --gtf "$meta_resources_dir/genes.gtf" \ + --filtered_gtf filtered_genes.gtf + +echo ">>> Check whether output exists" +[ ! -f "filtered_genes.gtf" ] && echo "Filtered GTF file does not exist!" && exit 1 +[ ! -s "filtered_genes.gtf" ] && echo "Filtered GTF file is empty!" && exit 1 + +echo "All tests succeeded!" +exit 0 \ No newline at end of file diff --git a/src/gunzip/config.vsh.yaml b/src/gunzip/config.vsh.yaml new file mode 100644 index 0000000..c0738cb --- /dev/null +++ b/src/gunzip/config.vsh.yaml @@ -0,0 +1,42 @@ +name: "gunzip" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/gunzip/main.nf, modules/nf-core/gunzip/meta.yml] + last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 +description: | + Compress or uncompress a file or list of files. + +argument_groups: +- name: "Input" + arguments: + - name: "--input" + type: file + required: true + description: Path of file to be uncompressed + +- name: "Output" + arguments: + - name: "--output" + type: file + direction: output + required: true + description: Decompressed file. + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - path: /testData/minimal_test/reference/genes.gff.gz + +engines: + - type: docker + image: ubuntu:22.04 + setup: + - type: apt + packages: [ gzip ] +runners: + - type: executable + - type: nextflow diff --git a/src/gunzip/script.sh b/src/gunzip/script.sh new file mode 100755 index 0000000..8eee2d0 --- /dev/null +++ b/src/gunzip/script.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -eo pipefail + +filename="$(basename -- "$par_input")" + +if [ ${filename##*.} == "gz" ]; then + gunzip -c $par_input > $par_output +else + cat $par_input > $par_output +fi diff --git a/src/gunzip/test.sh b/src/gunzip/test.sh new file mode 100644 index 0000000..b9f8690 --- /dev/null +++ b/src/gunzip/test.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# define input and output for script +input="$meta_resources_dir/genes.gff.gz" +output="genes.gff" + +# run executable and tests +echo "> Running $meta_functionality_name." + +"$meta_executable" \ + --input "$input" \ + --output "$output" + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> Checking whether output can be found and has content" + +[ ! -f "$output" ] && echo "$output file missing" && exit 1 +[ ! -s "$output" ] && echo "$output file is empty" && exit 1 + +exit 0 \ No newline at end of file diff --git a/src/multiqc_custom_biotype/biotypes_header.txt b/src/multiqc_custom_biotype/biotypes_header.txt new file mode 100644 index 0000000..dff52c0 --- /dev/null +++ b/src/multiqc_custom_biotype/biotypes_header.txt @@ -0,0 +1,11 @@ +# id: 'biotype_counts' +# section_name: 'Biotype Counts' +# description: "shows reads overlapping genomic features of different biotypes, +# counted by featureCounts." +# plot_type: 'bargraph' +# anchor: 'featurecounts_biotype' +# pconfig: +# id: "featurecounts_biotype_plot" +# title: "featureCounts: Biotypes" +# xlab: "# Reads" +# cpswitch_counts_label: "Number of Reads" diff --git a/src/multiqc_custom_biotype/config.vsh.yaml b/src/multiqc_custom_biotype/config.vsh.yaml new file mode 100644 index 0000000..bad8dc5 --- /dev/null +++ b/src/multiqc_custom_biotype/config.vsh.yaml @@ -0,0 +1,48 @@ +name: "multiqc_custom_biotype" +info: + migration_info: +description: Calculate features percentage for biotype counts + +argument_groups: +- name: "Input" + arguments: + - name: "--biocounts" + type: file + description: File with all biocounts + - name: "--id" + type: string + description: Sample name + default: $id + - name: "--features" + type: string + description: Features to count + default: rRNA + +- name: "Output" + arguments: + - name: '--featurecounts_multiqc' + type: file + direction: output + default: $id.biotype_counts_mqc.tsv + - name: '--featurecounts_rrna_multiqc' + type: file + direction: output + default: $id.biotype_counts_rrna_mqc.tsv + +resources: + # Adapted from https://github.com/nf-core/rnaseq/blob/3.12.0/bin/mqc_features_stat.py + - type: python_script + path: script.py + # Include the header for the biotypes in the component + - path: biotypes_header.txt + +test_resources: + - type: bash_script + path: test.sh + +engines: + - type: docker + image: python:latest +runners: + - type: executable + - type: nextflow diff --git a/src/multiqc_custom_biotype/script.py b/src/multiqc_custom_biotype/script.py new file mode 100755 index 0000000..b676a1b --- /dev/null +++ b/src/multiqc_custom_biotype/script.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 + +import argparse +import logging +import os + +# Create a logger +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__file__) +logger.setLevel(logging.INFO) + +mqc_main = """#id: 'biotype-gs' +#plot_type: 'generalstats' +#pconfig:""" + +mqc_pconf = """# percent_{ft}: +# title: '% {ft}' +# namespace: 'Biotype Counts' +# description: '% reads overlapping {ft} features' +# max: 100 +# min: 0 +# scale: 'RdYlGn-rev' +# format: '{{:.2f}}%'""" + + +def mqc_feature_stat(bfile, features, outfile, sname=None): + # If sample name not given use file name + if not sname: + sname = os.path.splitext(os.path.basename(bfile))[0] + + # Try to parse and read biocount file + fcounts = {} + try: + with open(bfile, "r") as bfl: + for ln in bfl: + if ln.startswith("#"): + continue + ft, cn = ln.strip().split("\t") + fcounts[ft] = float(cn) + except: + logger.error("Trouble reading the biocount file {}".format(bfile)) + return + + total_count = sum(fcounts.values()) + if total_count == 0: + logger.error("No biocounts found, exiting") + return + + # Calculate percentage for each requested feature + fpercent = {f: (fcounts[f] / total_count) * 100 if f in fcounts else 0 for f in features} + if len(fpercent) == 0: + logger.error("Any of given features '{}' not found in the biocount file".format(", ".join(features), bfile)) + return + + # Prepare the output strings + out_head, out_value, out_mqc = ("Sample", "'{}'".format(sname), mqc_main) + for ft, pt in fpercent.items(): + out_head = "{}\tpercent_{}".format(out_head, ft) + out_value = "{}\t{}".format(out_value, pt) + out_mqc = "{}\n{}".format(out_mqc, mqc_pconf.format(ft=ft)) + + # Write the output to a file + with open(outfile, "w") as ofl: + out_final = "\n".join([out_mqc, out_head, out_value]).strip() + ofl.write(out_final + "\n") + + +if __name__ == "__main__": + + # Read the biotypes_header.txt file + biotypes_header_path = os.path.join(meta["resources_dir"], 'biotypes_header.txt') + with open(biotypes_header_path, 'r') as header_file: + biotypes_header = header_file.read() + + # Extract specific columns (1 and 7) and skip the first two lines + filtered_lines = [] + with open(par["biocounts"], 'r') as biocounts_file: + for i, line in enumerate(biocounts_file): + if i >= 2: # Skipping first two lines + columns = line.strip().split('\t') + filtered_line = f"{columns[0]}\t{columns[6]}" # Columns 1 and 7 (0-indexed) + filtered_lines.append(filtered_line) + + # Concatenate the header and the processed lines + result = biotypes_header + '\n'.join(filtered_lines) + '\n' + + # Write the result to par_featurecounts_multiqc + with open(par["featurecounts_multiqc"], 'w') as output_file: + output_file.write(result) + + mqc_feature_stat(par["featurecounts_multiqc"], par["features"], par["featurecounts_rrna_multiqc"], par["id"]) diff --git a/src/multiqc_custom_biotype/test.sh b/src/multiqc_custom_biotype/test.sh new file mode 100644 index 0000000..f43c768 --- /dev/null +++ b/src/multiqc_custom_biotype/test.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +set -e + +echo "> Prepare test data" + +cat > "test.featurecounts.txt" << HERE +# Program:featureCounts v2.0.1; Command:"featureCounts" "-t" "CDS" "-T" "2" "-a" "genome.gtf" "-s" "1" "-o" "test.featureCounts.txt" "test.single_end.bam" +Geneid Chr Start End Strand Length test.single_end.bam +orf1ab MT192765.1;MT192765.1 259;13461 13461;21545 +;+ 21287 38 +S MT192765.1 21556 25374 + 3819 4 +ORF3a MT192765.1 25386 26210 + 825 0 +E MT192765.1 26238 26462 + 225 1 +M MT192765.1 26516 27181 + 666 1 +ORF6 MT192765.1 27195 27377 + 183 0 +ORF7a MT192765.1 27387 27749 + 363 0 +ORF7b MT192765.1 27749 27877 + 129 0 +ORF8 MT192765.1 27887 28249 + 363 0 +N MT192765.1 28267 29523 + 1257 2 +ORF10 MT192765.1 29551 29664 + 114 0 +HERE + +echo "> Run test" +"$meta_executable" \ + --biocounts "test.featurecounts.txt" \ + --id "test" \ + --featurecounts_multiqc "test.biotype_counts_mqc.tsv" \ + --featurecounts_rrna_multiqc "test.biotype_counts_rrna_mqc.tsv" + +echo "> Check results" +[ ! -f "test.biotype_counts_mqc.tsv" ] && echo "test.biotype_counts_mqc.tsv was not created" && exit 1 +[ ! -s "test.biotype_counts_mqc.tsv" ] && echo "test.biotype_counts_mqc.tsv is empty" && exit 1 +[ ! -f "test.biotype_counts_rrna_mqc.tsv" ] && echo "test.biotype_counts_rrna_mqc.tsv was not created" && exit 1 +[ ! -s "test.biotype_counts_rrna_mqc.tsv" ] && echo "test.biotype_counts_rrna_mqc.tsv is empty" && exit 1 + +exit 0 diff --git a/src/picard_markduplicates/config.vsh.yaml b/src/picard_markduplicates/config.vsh.yaml new file mode 100644 index 0000000..67f5069 --- /dev/null +++ b/src/picard_markduplicates/config.vsh.yaml @@ -0,0 +1,69 @@ +name: "picard_markduplicates" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/picard/markduplicates/main.nf, modules/nf-core/picard/markduplicates/meta.yml] + last_sha: 55398de6ab7577acfe9b1180016a93d7af7eb859 +description: | + Locate and tag duplicate reads in a BAM file + +argument_groups: +- name: "Input" + arguments: + - name: "--bam" + type: file + description: Input BAM file + - name: "--fasta" + type: file + description: Reference genome FASTA file + - name: "--fai" + type: file + description: Reference genome FASTA index + - name: "--extra_picard_args" + type: string + description: Additional argument to be passed to Picard MarkDuplicates + default: '--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' + +- name: "Output" + arguments: + - name: "--output_bam" + type: file + direction: output + description: BAM file with duplicate reads marked/removed + default: $id.MarkDuplicates.bam + - name: "--bai" + type: file + direction: output + description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag + default: $id.MarkDuplicates.bam.bai + must_exist: false + - name: "--metrics" + type: file + direction: output + description: Duplicate metrics file generated by picard + default: $id.MarkDuplicates.metrics.txt + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam + - path: /testData/unit_test_resources/sarscov2/genome.fasta + +engines: + - type: docker + image: ubuntu:22.04 + setup: + - type: docker + run: | + apt-get update && \ + apt-get install -y build-essential openjdk-17-jdk wget && \ + wget --no-check-certificate https://github.com/broadinstitute/picard/releases/download/3.1.1/picard.jar && \ + mv picard.jar /usr/local/bin + env: [ PICARD=/usr/local/bin/picard.jar ] +runners: + - type: executable + - type: nextflow diff --git a/src/picard_markduplicates/script.sh b/src/picard_markduplicates/script.sh new file mode 100755 index 0000000..2a288c4 --- /dev/null +++ b/src/picard_markduplicates/script.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -eo pipefail + +avail_mem=3072 +if [ ! $meta_memory_mb ]; then + echo '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' +else + avail_mem=$(( $meta_memory_mb*0.8 )) +fi + +java -Xmx${avail_mem}M -jar $PICARD MarkDuplicates \ + $par_extra_picard_args \ + --INPUT $par_bam \ + --OUTPUT $par_output_bam \ + --REFERENCE_SEQUENCE $par_fasta \ + --METRICS_FILE $par_metrics diff --git a/src/picard_markduplicates/test.sh b/src/picard_markduplicates/test.sh new file mode 100644 index 0000000..fdaa5ed --- /dev/null +++ b/src/picard_markduplicates/test.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +echo ">>> Testing $meta_functionality_name" + +"$meta_executable" \ + --bam "$meta_resources_dir/test.paired_end.sorted.bam" \ + --fasta "$meta_resources_dir/genome.fasta" \ + --extra_picard_args "--REMOVE_DUPLICATES false" \ + --output_bam "test.MarkDuplicates.genome.bam" \ + --metrics "test.MarkDuplicates.metrics.txt" + +echo ">>> Check whether output exists" +[ ! -f "test.MarkDuplicates.genome.bam" ] && echo "MarkDuplicates output BAM file does not exist!" && exit 1 +[ ! -s "test.MarkDuplicates.genome.bam" ] && echo "MarkDuplicates output BAM file is empty!" && exit 1 +[ ! -f "test.MarkDuplicates.metrics.txt" ] && echo "MarkDuplicates output metrics file does not exist!" && exit 1 +[ ! -s "test.MarkDuplicates.metrics.txt" ] && echo "MarkDuplicates output metrics file is empty!" && exit 1 + +echo "All tests succeeded!" +exit 0 \ No newline at end of file diff --git a/src/prepare_multiqc_input/config.vsh.yaml b/src/prepare_multiqc_input/config.vsh.yaml new file mode 100644 index 0000000..5c7ce5f --- /dev/null +++ b/src/prepare_multiqc_input/config.vsh.yaml @@ -0,0 +1,149 @@ +name: "prepare_multiqc_input" +description: | + Prepare directory with all the input files for MultiQC. + +argument_groups: + - name: "Input" + arguments: + - name: "--fail_trimming_multiqc" + type: string + - name: "--fail_mapping_multiqc" + type: string + - name: "--fail_strand_multiqc" + type: string + - name: "--fastqc_raw_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--fastqc_trim_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--trim_log_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--sortmerna_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--star_multiqc" + type: file + multiple: true + multiple_sep: "," + # - name: "--hisat2_multiqc" + # type: file + # - name: "--rsem_multiqc" + # type: file + - name: "--salmon_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--samtools_stats" + type: file + multiple: true + multiple_sep: "," + - name: "--samtools_flagstat" + type: file + multiple: true + multiple_sep: "," + - name: "--samtools_idxstats" + type: file + multiple: true + multiple_sep: "," + - name: "--markduplicates_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--pseudo_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--featurecounts_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--featurecounts_rrna_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--aligner_pca_multiqc" + type: file + - name: "--aligner_clustering_multiqc" + type: file + - name: "--pseudo_aligner_pca_multiqc" + type: file + - name: "--pseudo_aligner_clustering_multiqc" + type: file + - name: "--preseq_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--qualimap_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--dupradar_output_dup_intercept_mqc" + type: file + multiple: true + multiple_sep: "," + - name: "--dupradar_output_duprate_exp_denscurve_mqc" + type: file + multiple: true + multiple_sep: "," + - name: "--bamstat_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--inferexperiment_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--innerdistance_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--junctionannotation_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--junctionsaturation_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--readdistribution_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--readduplication_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--tin_multiqc" + type: file + multiple: true + multiple_sep: "," + - name: "--multiqc_config" + type: file + description: | + Custom multiqc configuration file + + - name: "Ouput" + arguments: + - name: "--output" + type: file + direction: output + default: multiqc_input + +resources: + - type: bash_script + path: script.sh + - path: /src/assets/multiqc_config.yml + +engines: + - type: docker + image: ubuntu:22.04 + +runners: + - type: executable + - type: nextflow diff --git a/src/prepare_multiqc_input/script.sh b/src/prepare_multiqc_input/script.sh new file mode 100644 index 0000000..3aa7b6f --- /dev/null +++ b/src/prepare_multiqc_input/script.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +set -eo pipefail + +mkdir -p $par_output + +echo $par_fail_trimming_multiqc > $par_output/fail_trimming_mqc.tsv +echo $par_fail_mapping_multiqc > $par_output/fail_mapping_mqc.tsv +echo $par_fail_strand_multiqc > $par_output/fail_strand_mqc.tsv + +IFS="," read -ra fastqc_raw_multiqc <<< $par_fastqc_raw_multiqc && for file in "${fastqc_raw_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra fastqc_trim_multiqc <<< $par_fastqc_trim_multiqc && for file in "${fastqc_trim_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra trim_log_multiqc <<< $par_trim_log_multiqc && for file in "${trim_log_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra sortmerna_multiqc <<< $par_sortmerna_multiqc && for file in "${sortmerna_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra star_multiqc <<< $par_star_multiqc && for file in "${star_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +# IFS="," read -ra hisat2_multiqc <<< $par_hisat2_multiqc && for file in "${hisat2_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra rsem_multiqc <<< $par_rsem_multiqc && for file in "${rsem_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra salmon_multiqc <<< $par_salmon_multiqc && for file in "${salmon_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra pseudo_multiqc <<< $par_pseudo_multiqc && for file in "${pseudo_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra samtools_stats <<< $par_samtools_stats && for file in "${samtools_stats[@]}"; do [ -e "$file" ] && cp -r "$file" $par_output/; done + +IFS="," read -ra samtools_flagstat <<< $par_samtools_flagstat && for file in "${samtools_flagstat[@]}"; do [ -e "$file" ] && cp -r "$file" $par_output/; done + +IFS="," read -ra samtools_idxstats <<< $par_samtools_idxstats && for file in "${samtools_idxstats[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra markduplicates_multiqc <<< $par_markduplicates_multiqc && for file in "${markduplicates_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra featurecounts_multiqc <<< $par_featurecounts_multiqc && for file in "${featurecounts_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra featurecounts_rrna_multiqc <<< $par_featurecounts_rrna_multiqc && for file in "${featurecounts_rrna_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +[ -e "$par_aligner_pca_multiqc" ] && cp -r "$par_aligner_pca_multiqc" "$par_output/" + +[ -e "$par_aligner_clustering_multiqc" ] && cp -r $par_aligner_clustering_multiqc "$par_output/" + +[ -e "$par_pseudo_aligner_pca_multiqc" ] && cp -r $par_pseudo_aligner_pca_multiqc "$par_output/" + +[ -e "$par_pseudo_aligner_clustering_multiqc" ] && cp -r $par_pseudo_aligner_clustering_multiqc "$par_output/" + +IFS="," read -ra preseq_multiqc <<< $par_preseq_multiqc && for file in "${preseq_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra qualimap_multiqc <<< $par_qualimap_multiqc && for file in "${qualimap_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra dupradar_output_dup_intercept_mqc <<< $par_dupradar_output_dup_intercept_mqc && for file in "${dupradar_output_dup_intercept_mqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra dupradar_output_duprate_exp_denscurve_mqc <<< $par_dupradar_output_duprate_exp_denscurve_mqc && for file in "${dupradar_output_duprate_exp_denscurve_mqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra bamstat_multiqc <<< $par_bamstat_multiqc && for file in "${bamstat_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra inferexperiment_multiqc <<< $par_inferexperiment_multiqc && for file in "${inferexperiment_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra innerdistance_multiqc <<< $par_innerdistance_multiqc && for file in "${innerdistance_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra junctionannotation_multiqc <<< $par_junctionannotation_multiqc && for file in "${junctionannotation_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra junctionsaturation_multiqc <<< $par_junctionsaturation_multiqc && for file in "${junctionsaturation_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra readdistribution_multiqc <<< $par_readdistribution_multiqc && for file in "${readdistribution_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra readduplication_multiqc <<< $par_readduplication_multiqc && for file in "${readduplication_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +IFS="," read -ra tin_multiqc <<< $par_tin_multiqc && for file in "${tin_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done + +echo "Checking for custom multiqc_config" +# If the variable is empty, we use the default one (registered as a resource) +if [ -z $par_multiqc_config ]; then + echo "No multiqc_config provided, using the default" + cp $meta_resources_dir/multiqc_config.yml "$par_output" +else + echo "Optional file provided" + if [ -f $par_multiqc_config ]; then + cp $par_multiqc_config "$par_output"/multiqc_config.yml + else + # Unreachable: the Viash-generated module checks this + echo "Optional file does not exist" + exit 1 + fi +fi + +echo "Done" diff --git a/src/preprocess_transcripts_fasta/config.vsh.yaml b/src/preprocess_transcripts_fasta/config.vsh.yaml new file mode 100644 index 0000000..3590bfe --- /dev/null +++ b/src/preprocess_transcripts_fasta/config.vsh.yaml @@ -0,0 +1,40 @@ +name: "preprocess_transcripts_fasta" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/local/preprocess_transcripts_fasta_gencode.nf] + last_sha: 0a1bdcfbb498987643b74e9fccab85ccd9f2a17d +description: | + Process transcripts FASTA if GTF file is GENOCODE format + +argument_groups: +- name: "Input" + arguments: + - name: "--transcript_fasta" + type: file + required: true + description: Path of transcripts FASTA file + +- name: "Output" + arguments: + - name: "--output" + type: file + direction: output + required: true + description: Path of processed output FASTA file. + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/minimal_test/reference/transcriptome.fasta + +engines: + - type: docker + image: ubuntu:22.04 +runners: + - type: executable + - type: nextflow diff --git a/src/preprocess_transcripts_fasta/script.sh b/src/preprocess_transcripts_fasta/script.sh new file mode 100644 index 0000000..1b966cf --- /dev/null +++ b/src/preprocess_transcripts_fasta/script.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -eo pipefail + +filename="$(basename -- "$par_transcript_fasta")" + +if [ ${filename##*.} == "gz" ]; then + zcat $par_transcript_fasta | cut -d "|" -f1 > $par_output +else + cat $par_transcript_fasta | cut -d "|" -f1 > $par_output +fi diff --git a/src/preprocess_transcripts_fasta/test.sh b/src/preprocess_transcripts_fasta/test.sh new file mode 100644 index 0000000..419dc11 --- /dev/null +++ b/src/preprocess_transcripts_fasta/test.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +echo ">>> Testing $meta_functionality_name" + +"$meta_executable" \ + --transcript_fasta "$meta_resources_dir/transcriptome.fasta" \ + --output "processed_transcriptome.fasta" + +echo ">>> Check whether output exists" +[ ! -f "processed_transcriptome.fasta" ] && echo "Processed FASTA file does not exist!" && exit 1 +[ ! -s "processed_transcriptome.fasta" ] && echo "Processed FASTA file is empty!" && exit 1 + +echo "All tests succeeded!" +exit 0 \ No newline at end of file diff --git a/src/preseq_lcextrap/config.vsh.yaml b/src/preseq_lcextrap/config.vsh.yaml new file mode 100644 index 0000000..869f519 --- /dev/null +++ b/src/preseq_lcextrap/config.vsh.yaml @@ -0,0 +1,68 @@ +name: "preseq_lcextrap" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/preseq/lcextrap/main.nf, modules/nf-core/preseq/lcextrap/meta.yml] + last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 +description: Computing the expected future yield of distinct reads and bounds on the number of total distinct reads in the library and the associated confidence intervals. + +argument_groups: +- name: "Input" + arguments: + - name: "--input" + type: file + description: Input genome BAM/BED file + - name: "--extra_preseq_args" + type: string + - name: "--paired" + type: boolean + description: Paired-end reads? + +- name: "Output" + arguments: + - name: "--output" + type: file + direction: output + default: $id.lc_extrap.txt + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/unit_test_resources/a.sorted.bed + - path: /testData/unit_test_resources/SRR1106616_5M_subset.bam + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: apt + packages: [ curl, bzip2, build-essential, wget, gcc, autoconf, automake, make, libz-dev, libbz2-dev, zlib1g-dev, libncurses5-dev, libncursesw5-dev, liblzma-dev, pip ] + - type: docker + run: | + cd /usr/bin && \ + wget --no-check-certificate https://github.com/smithlabcode/preseq/releases/download/v3.2.0/preseq-3.2.0.tar.gz && \ + wget https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2 && \ + wget --no-check-certificate https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static && \ + curl -fsSL https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2 -o samtools-1.18.tar.bz2 && \ + tar -xjf samtools-1.18.tar.bz2 && rm samtools-1.18.tar.bz2 && \ + tar -xzf preseq-3.2.0.tar.gz && rm preseq-3.2.0.tar.gz && \ + tar -vxjf htslib-1.9.tar.bz2 && rm htslib-1.9.tar.bz2 && \ + mv bedtools.static /usr/local/bin/bedtools && \ + chmod a+x /usr/local/bin/bedtools && \ + cd samtools-1.18 && \ + ./configure && \ + make && \ + make install && \ + cd /usr/bin && cd htslib-1.9 && \ + make && \ + cd /usr/bin && cd preseq-3.2.0 && \ + mkdir build && cd build && \ + ../configure && \ + make && make install && make HAVE_HTSLIB=1 all +runners: +- type: executable +- type: nextflow diff --git a/src/preseq_lcextrap/script.sh b/src/preseq_lcextrap/script.sh new file mode 100644 index 0000000..0ab4956 --- /dev/null +++ b/src/preseq_lcextrap/script.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +set -eo pipefail + +file=$(basename -- "$par_input") +filename="${file%.*}" + +if [ "${file##*.}" == "bam" ]; then + samtools sort -o sorted_$filename.bam -n $par_input + bedtools bamtobed -i sorted_$filename.bam > $filename.bed + bedtools sort -i $filename.bed > sorted_$filename.bed +elif [ "${file##*.}" == "bed" ]; then + bedtools sort -i $par_input > sorted_$filename.bed +else + echo "Invalid input file format!" + exit 1 +fi + +if $par_paired; then + paired="-pe" +else + paired="" +fi + +preseq lc_extrap \ + sorted_$filename.bed \ + $paired \ + $par_extra_preseq_args \ + -o $par_output diff --git a/src/preseq_lcextrap/test.sh b/src/preseq_lcextrap/test.sh new file mode 100644 index 0000000..3368d63 --- /dev/null +++ b/src/preseq_lcextrap/test.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +echo ">>> Testing $meta_functionality_name" + +echo ">>> Testing with BAM input" +"$meta_executable" \ + --paired false \ + --input "$meta_resources_dir/SRR1106616_5M_subset.bam" \ + --output lc_extrap.txt + +echo ">>> Check whether output exists" +[ ! -f "lc_extrap.txt" ] && echo "Output file does not exist!" && exit 1 +[ ! -s "lc_extrap.txt" ] && echo "Output file is empty!" && exit 1 + +rm lc_extrap.txt + +echo ">>> Testing with BED input" +"$meta_executable" \ + --paired false \ + --input "$meta_resources_dir/a.sorted.bed" \ + --output lc_extrap.txt + +echo ">>> Check whether output exists" +[ ! -f "lc_extrap.txt" ] && echo "Output file does not exist!" && exit 1 +[ ! -s "lc_extrap.txt" ] && echo "Output file is empty!" && exit 1 + +echo "All tests succeeded!" +exit 0 \ No newline at end of file diff --git a/src/rsem_merge_counts/config.vsh.yaml b/src/rsem_merge_counts/config.vsh.yaml new file mode 100644 index 0000000..823bad7 --- /dev/null +++ b/src/rsem_merge_counts/config.vsh.yaml @@ -0,0 +1,60 @@ +name: "rsem_merge_counts" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/local/rsem_merge_counts/main.nf] + last_sha: 311279532694ce7520164ce4d65a388c0cd11f60 + +description: | + Merge the transcript quantification results obtained from rsem calculate-expression across all samples. + +argument_groups: +- name: "Input" + arguments: + - name: "--counts_gene" + type: file + description: Expression counts on gene level (genes) + - name: "--counts_transcripts" + type: file + description: Expression counts on transcript level (isoforms) + +- name: "Output" + arguments: + - name: "--merged_gene_counts" + type: file + description: File containing gene counts across all samples. + default: rsem.merged.gene_counts.tsv + direction: output + - name: "--merged_gene_tpm" + type: file + description: File containing gene TPM across all samples. + default: rsem.merged.gene_tpm.tsv + direction: output + - name: "--merged_transcript_counts" + type: file + description: File containing transcript counts across all samples. + default: rsem.merged.transcript_counts.tsv + direction: output + - name: "--merged_transcript_tpm" + type: file + description: File containing transcript TPM across all samples. + default: rsem.merged.transcript_tpm.tsv + direction: output + +resources: + - type: bash_script + path: script.sh + +# test_resources: +# - type: bash_script +# path: test.sh + # - path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz + # - path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz + +engines: + - type: docker + image: ubuntu:22.04 + +runners: + - type: executable + - type: nextflow diff --git a/src/rsem_merge_counts/script.sh b/src/rsem_merge_counts/script.sh new file mode 100644 index 0000000..524d44c --- /dev/null +++ b/src/rsem_merge_counts/script.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +set -ep pipefail + +mkdir -p tmp/genes +# cut -f 1,2 `ls $par_count_genes/*` | head -n 1` > gene_ids.txt +for file_id in ${par_count_genes[*]}; do + samplename=`basename $file_id | sed s/\\.genes.results\$//g` + echo $samplename > tmp/genes/${samplename}.counts.txt + cut -f 5 ${file_id} | tail -n+2 >> tmp/genes/${samplename}.counts.txt + echo $samplename > tmp/genes/${samplename}.tpm.txt + cut -f 6 ${file_id} | tail -n+2 >> tmp/genes/${samplename}.tpm.txt +done + +mkdir -p tmp/isoforms +# cut -f 1,2 `ls $par_counts_transcripts/*` | head -n 1` > transcript_ids.txt +for file_id in ${par_counts_transcripts[*]}; do + samplename=`basename $file_id | sed s/\\.isoforms.results\$//g` + echo $samplename > tmp/isoforms/${samplename}.counts.txt + cut -f 5 ${file_id} | tail -n+2 >> tmp/isoforms/${samplename}.counts.txt + echo $samplename > tmp/isoforms/${samplename}.tpm.txt + cut -f 6 ${file_id} | tail -n+2 >> tmp/isoforms/${samplename}.tpm.txt +done + +paste gene_ids.txt tmp/genes/*.counts.txt > $par_merged_gene_counts +paste gene_ids.txt tmp/genes/*.tpm.txt > $par_merged_gene_tpm +paste transcript_ids.txt tmp/isoforms/*.counts.txt > $par_merged_transcript_counts +paste transcript_ids.txt tmp/isoforms/*.tpm.txt > $par_merged_transcript_tpm diff --git a/src/rseqc/rseqc_junctionannotation/config.vsh.yaml b/src/rseqc/rseqc_junctionannotation/config.vsh.yaml new file mode 100644 index 0000000..f43a8f1 --- /dev/null +++ b/src/rseqc/rseqc_junctionannotation/config.vsh.yaml @@ -0,0 +1,108 @@ +name: "rseqc_junctionannotation" +namespace: "rseqc" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/rseqc/junctionannotation/main.nf] + last_sha: +description: | + Compare detected splice junctions to reference gene model. + +argument_groups: +- name: "Input" + arguments: + - name: "--input" + type: file + required: true + description: input alignment file in BAM or SAM format + + - name: "--refgene" + type: file + required: true + description: Reference gene model in bed format + + - name: "--map_qual" + type: integer + required: false + default: 30 + description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30. + min: 0 + + - name: "--min_intron" + type: integer + required: false + default: 50 + min: 1 + description: Minimum intron length (bp), default = 50. + +- name: "Output" + arguments: + - name: "--output_log" + type: file + direction: output + required: false + default: $id.junction_annotation.log + description: output log of junction annotation script + + - name: "--output_plot_r" + type: file + direction: output + required: false + default: $id.junction_annotation_plot.r + description: r script to generate splice_junction and splice_events plot + + - name: "--output_junction_bed" + type: file + direction: output + required: false + default: $id.junction_annotation.bed + description: junction annotation file (bed format) + + - name: "--output_junction_interact" + type: file + direction: output + required: false + default: $id.junction_annotation.Interact.bed + description: interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization. + + - name: "--output_junction_sheet" + type: file + direction: output + required: false + default: $id.junction_annotation.xls + description: junction annotation file (xls format) + + - name: "--output_splice_events_plot" + type: file + direction: output + required: false + default: $id.splice_events.pdf + description: plot of splice events (pdf) + + - name: "--output_splice_junctions_plot" + type: file + direction: output + required: false + default: $id.splice_junctions_plot.pdf + description: plot of junctions (pdf) + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam + - path: /testData/unit_test_resources/sarscov2/test.bed12 + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: apt + packages: [ python3-pip, r-base] + - type: python + packages: [ RSeQC ] +runners: +- type: executable +- type: nextflow diff --git a/src/rseqc/rseqc_junctionannotation/script.sh b/src/rseqc/rseqc_junctionannotation/script.sh new file mode 100644 index 0000000..9e4f287 --- /dev/null +++ b/src/rseqc/rseqc_junctionannotation/script.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +set -eo pipefail + +prefix=$(openssl rand -hex 8) +input="testData/unit_test_resources/test.paired_end.sorted.bam" +refgene="testData/unit_test_resources/test.bed" +junction_annotation.py \ + -i $par_input \ + -r $par_refgene \ + -o $prefix \ + -m $par_min_intron \ + -q $par_map_qual > $par_output_log + +[[ -f "$prefix.junction.bed" ]] && mv $prefix.junction.bed $par_output_junction_bed +[[ -f "$prefix.junction.Interact.bed" ]] && mv $prefix.junction.Interact.bed $par_output_junction_interact +[[ -f "$prefix.junction.xls" ]] && mv $prefix.junction.xls $par_output_junction_sheet +[[ -f "$prefix.junction_plot.r" ]] && mv $prefix.junction_plot.r $par_output_plot_r +[[ -f "$prefix.splice_events.pdf" ]] && mv $prefix.splice_events.pdf $par_output_splice_events_plot +[[ -f "$prefix.splice_junction.pdf" ]] && mv $prefix.splice_junction.pdf $par_output_splice_junctions_plot diff --git a/src/rseqc/rseqc_junctionannotation/test.sh b/src/rseqc/rseqc_junctionannotation/test.sh new file mode 100644 index 0000000..4033c15 --- /dev/null +++ b/src/rseqc/rseqc_junctionannotation/test.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# define input and output for script +input_bam="$meta_resources_dir/test.paired_end.sorted.bam" +input_bed="$meta_resources_dir/test.bed12" + +output_junction_bed="junction_annotation.bed" +output_junction_interact="junction_annotation.Interact.bed" +output_junction_sheet="junction_annotation.xls" +output_plot_r="junction_annotation_plot.r" +output_splice_events_plot="splice_events.pdf" +output_splice_junctions_plot="splice_junctions_plot.pdf" +output_log="junction_annotation.log" + +# run executable and test +echo "> Running $meta_functionality_name" + +"$meta_executable" \ + --input "$input_bam" \ + --refgene "$input_bed" \ + --output_log "$output_log" \ + --output_plot_r "$output_plot_r" \ + --output_junction_bed "$output_junction_bed" \ + --output_junction_interact "$output_junction_interact" \ + --output_junction_sheet "$output_junction_sheet" \ + --output_splice_events_plot "$output_splice_events_plot" \ + --output_splice_junctions_plot "$output_splice_junctions_plot" + +# exit_code=$? +# [[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> Check if all output files were created" + +[ ! -f "$output_log" ] && echo "$output_log was not created" && exit 1 +[ ! -f "$output_junction_sheet" ] && echo "$output_junction_sheet was not created" && exit 1 +[ -s "$output_junction_sheet" ] && echo "$output_junction_sheet is not empty but should be" && exit 1 +[ ! -f "$output_plot_r" ] && echo "$output_plot_r was not created" && exit 1 +[ -s "$output_plot_r" ] && echo "$output_plot_r is not empty but should be" && exit 1 +# [ ! -f "$output_junction_bed" ] && echo "$output_junction_bed was not created" && exit 1 +# [ ! -s "$output_junction_bed" ] && echo "$output_junction_bed is empty" && exit 1 +# [ ! -f "$output_junction_interact" ] && echo "$output_junction_interact was not created" && exit 1 +# [ ! -s "$output_junction_interact" ] && echo "$output_junction_interact is empty" && exit 1 +# [ ! -f "$output_splice_events_plot" ] && echo "$output_splice_events_plot was not created" && exit 1 +# [ ! -s "$output_splice_events_plot" ] && echo "$output_splice_events_plot is empty" && exit 1 +# [ ! -f "$output_splice_junctions_plot" ] && echo "$output_splice_junctions_plot was not created" && exit 1 +# [ ! -s "$output_splice_junctions_plot" ] && echo "$output_splice_junctions_plot is empty" && exit 1 + +exit 0 \ No newline at end of file diff --git a/src/rseqc/rseqc_junctionsaturation/config.vsh.yaml b/src/rseqc/rseqc_junctionsaturation/config.vsh.yaml new file mode 100644 index 0000000..09cbf6d --- /dev/null +++ b/src/rseqc/rseqc_junctionsaturation/config.vsh.yaml @@ -0,0 +1,105 @@ +name: "rseqc_junctionsaturation" +namespace: "rseqc" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/rseqc/junctionsaturation/main.nf] + last_sha: +description: | + Compare detected splice junctions to reference gene model. + +argument_groups: +- name: "Input" + arguments: + - name: "--input" + type: file + required: true + description: input alignment file in BAM or SAM format + + - name: "--refgene" + type: file + required: true + description: Reference gene model in bed format + + - name: "--sampling_percentile_lower_bound" + type: integer + required: false + default: 5 + description: Sampling starts from this percentile, must be an integer between 0 and 100, default =5. + min: 0 + max: 100 + + - name: "--sampling_percentile_upper_bound" + type: integer + required: false + default: 100 + description: Sampling ends at this percentile, must be an integer between 0 and 100, default =5. + min: 0 + max: 100 + + - name: "--sampling_percentile_step" + type: integer + required: false + default: 5 + description: Sampling frequency in %. Smaller value means more sampling times. Must be an integer between 0 and 100, default = 5. + min: 0 + max: 100 + + - name: "--min_intron" + type: integer + required: false + default: 50 + min: 1 + description: Minimum intron length (bp), default = 50. + + - name: "--min_splice_read" + type: integer + required: false + default: 1 + min: 1 + description: Minimum number of supporting reads to call a junction, default = 1. + + - name: "--map_qual" + type: integer + required: false + default: 30 + description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30. + min: 0 + +- name: "Output" + arguments: + - name: "--output_plot_r" + type: file + direction: output + required: false + default: $id.junction_saturation_plot.r + description: r script to generate junction_saturation_plot plot + + - name: "--output_plot" + type: file + direction: output + required: false + default: $id.junction_saturation_plot.pdf + description: plot of junction saturation (pdf) + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam + - path: /testData/unit_test_resources/sarscov2/test.bed + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: apt + packages: [ python3-pip, r-base] + - type: python + packages: [ RSeQC ] +runners: +- type: executable +- type: nextflow diff --git a/src/rseqc/rseqc_junctionsaturation/script.sh b/src/rseqc/rseqc_junctionsaturation/script.sh new file mode 100644 index 0000000..c896f73 --- /dev/null +++ b/src/rseqc/rseqc_junctionsaturation/script.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -eo pipefail + +prefix=$(openssl rand -hex 8) + +junction_saturation.py \ + -i $par_input \ + -r $par_refgene \ + -o $prefix \ + -l $par_sampling_percentile_lower_bound \ + -u $par_sampling_percentile_upper_bound \ + -s $par_sampling_percentile_step \ + -m $par_min_intron \ + -v $par_min_splice_read \ + -q $par_map_qual + +[[ -f "$prefix.junctionSaturation_plot.pdf" ]] && mv $prefix.junctionSaturation_plot.pdf $par_output_plot +[[ -f "$prefix.junctionSaturation_plot.r" ]] && mv $prefix.junctionSaturation_plot.r $par_output_plot_r diff --git a/src/rseqc/rseqc_junctionsaturation/test.sh b/src/rseqc/rseqc_junctionsaturation/test.sh new file mode 100644 index 0000000..6d8e566 --- /dev/null +++ b/src/rseqc/rseqc_junctionsaturation/test.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +gunzip "$meta_resources_dir/hg19_RefSeq.bed.gz" + +# define input and output for script +input_bam="$meta_resources_dir/test.paired_end.sorted.bam" +input_bed="$meta_resources_dir/test.bed" + +output_plot="junction_saturation_plot.pdf" +output_plot_r="junction_saturation_plot.r" + +# run executable and test +echo "> Running $meta_functionality_name" +"$meta_executable" \ + --input "$input_bam" \ + --refgene "$input_bed" \ + --output_plot_r "$output_plot_r" \ + --output_plot "$output_plot" + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> asserting all output files were created" + +[ ! -f "$output_plot_r" ] && echo "$output_plot_r was not created" && exit 1 +[ ! -s "$output_plot_r" ] && echo "$output_plot_r is empty" && exit 1 +[ ! -f "$output_plot" ] && echo "$output_plot was not created" && exit 1 +[ ! -s "$output_plot" ] && echo "$output_plot is empty" && exit 1 + +exit 0 \ No newline at end of file diff --git a/src/rseqc/rseqc_readdistribution/config.vsh.yaml b/src/rseqc/rseqc_readdistribution/config.vsh.yaml new file mode 100644 index 0000000..c1a73b5 --- /dev/null +++ b/src/rseqc/rseqc_readdistribution/config.vsh.yaml @@ -0,0 +1,52 @@ +name: "rseqc_readdistribution" +namespace: "rseqc" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/rseqc/readdistribution/main.nf] + last_sha: +description: | + Calculate how mapped reads are distributed over genomic features. + +argument_groups: +- name: "Input" + arguments: + - name: "--input" + type: file + required: true + description: input alignment file in BAM or SAM format + + - name: "--refgene" + type: file + required: true + description: Reference gene model in bed format + +- name: "Output" + arguments: + - name: "--output" + type: file + direction: output + required: false + default: $id.read_distribution.txt + description: output file (txt) of read distribution analysis. + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam + - path: /testData/unit_test_resources/sarscov2/test.bed12 + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: apt + packages: [ python3-pip ] + - type: python + packages: [ RSeQC ] +runners: +- type: executable +- type: nextflow diff --git a/src/rseqc/rseqc_readdistribution/script.sh b/src/rseqc/rseqc_readdistribution/script.sh new file mode 100644 index 0000000..6e0fad4 --- /dev/null +++ b/src/rseqc/rseqc_readdistribution/script.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -eo pipefail + +read_distribution.py \ + -i $par_input \ + -r $par_refgene \ +> $par_output diff --git a/src/rseqc/rseqc_readdistribution/test.sh b/src/rseqc/rseqc_readdistribution/test.sh new file mode 100644 index 0000000..dab1617 --- /dev/null +++ b/src/rseqc/rseqc_readdistribution/test.sh @@ -0,0 +1,24 @@ +#!/bin/bash + + +# define input and output for script +input_bam="$meta_resources_dir/test.paired_end.sorted.bam" +input_bed="$meta_resources_dir/test.bed12" +output="read_distribution.txt" + +# run executable and test +echo "> Running $meta_functionality_name" + +"$meta_executable" \ + --input "$input_bam" \ + --refgene "$input_bed" \ + --output "$output" + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> Asserting output file was created" +[ ! -f "$output" ] && echo "$output was not created" && exit 1 +[ ! -f "$output" ] && echo "$output is empty" && exit 1 + +exit 0 \ No newline at end of file diff --git a/src/rseqc/rseqc_readduplication/config.vsh.yaml b/src/rseqc/rseqc_readduplication/config.vsh.yaml new file mode 100644 index 0000000..5db8a72 --- /dev/null +++ b/src/rseqc/rseqc_readduplication/config.vsh.yaml @@ -0,0 +1,82 @@ +name: "rseqc_readduplication" +namespace: "rseqc" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/rseqc/readduplication/main.nf] + last_sha: +description: | + Calculate read duplication rate. + +argument_groups: +- name: "Input" + arguments: + - name: "--input" + type: file + required: true + description: input alignment file in BAM or SAM format + + - name: "--read_count_upper_limit" + type: integer + required: false + default: 500 + description: Upper limit of reads' occurence. Only used for plotting, default = 500 (times). + min: 1 + + - name: "--map_qual" + type: integer + required: false + default: 30 + description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30. + min: 0 + +- name: "Output" + arguments: + - name: "--output_duplication_rate_plot_r" + type: file + direction: output + required: false + default: $id.duplication_rate_plot.r + description: R script for generating duplication rate plot + + - name: "--output_duplication_rate_plot" + type: file + direction: output + required: false + default: $id.duplication_rate_plot.pdf + description: duplication rate plot (pdf) + + - name: "--output_duplication_rate_mapping" + type: file + direction: output + required: false + default: $id.duplication_rate_mapping.xls + description: Summary of mapping-based read duplication + + - name: "--output_duplication_rate_sequence" + type: file + direction: output + required: false + default: $id.duplication_rate_sequencing.xls + description: Summary of sequencing-based read duplication + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: "apt" + packages: [python3-pip, r-base] + - type: python + packages: [RSeQC] +runners: +- type: executable +- type: nextflow diff --git a/src/rseqc/rseqc_readduplication/script.sh b/src/rseqc/rseqc_readduplication/script.sh new file mode 100644 index 0000000..4b9ce15 --- /dev/null +++ b/src/rseqc/rseqc_readduplication/script.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -eo pipefail + +prefix=$(openssl rand -hex 8) + +read_duplication.py \ + -i $par_input \ + -o $prefix \ + -u $par_read_count_upper_limit \ + -q $par_map_qual + +[[ -f "$prefix.DupRate_plot.pdf" ]] && mv $prefix.DupRate_plot.pdf $par_output_duplication_rate_plot +[[ -f "$prefix.DupRate_plot.r" ]] && mv $prefix.DupRate_plot.r $par_output_duplication_rate_plot_r +[[ -f "$prefix.pos.DupRate.xls" ]] && mv $prefix.pos.DupRate.xls $par_output_duplication_rate_mapping +[[ -f "$prefix.seq.DupRate.xls" ]] && mv $prefix.seq.DupRate.xls $par_output_duplication_rate_sequence diff --git a/src/rseqc/rseqc_readduplication/test.sh b/src/rseqc/rseqc_readduplication/test.sh new file mode 100644 index 0000000..8764b71 --- /dev/null +++ b/src/rseqc/rseqc_readduplication/test.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# define input and output for script +input_bam="$meta_resources_dir/test.paired_end.sorted.bam" + +output_duplication_rate_plot_r="duplication_rate_plot.r" +output_duplication_rate_plot="duplication_rate_plot.pdf" + +# run executable and test +echo "> Running $meta_functionality_name" +"$meta_executable" \ + --input "$input_bam" \ + --output_duplication_rate_plot_r "$output_duplication_rate_plot_r" \ + --output_duplication_rate_plot "$output_duplication_rate_plot" + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> asserting all output files were created" +[ ! -f "$output_duplication_rate_plot_r" ] && echo "$output_duplication_rate_plot_r was not created" && exit 1 +[ ! -s "$output_duplication_rate_plot_r" ] && echo "$output_duplication_rate_plot_r is empty" && exit 1 +[ ! -f "$output_duplication_rate_plot" ] && echo "$output_duplication_rate_plot was not created" && exit 1 +[ ! -s "$output_duplication_rate_plot" ] && echo "$output_duplication_rate_plot is empty" && exit 1 + +exit 0 \ No newline at end of file diff --git a/src/rseqc/tin/config.vsh.yaml b/src/rseqc/tin/config.vsh.yaml new file mode 100644 index 0000000..f841533 --- /dev/null +++ b/src/rseqc/tin/config.vsh.yaml @@ -0,0 +1,85 @@ +name: "rseqc_tin" +namespace: "rseqc" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/rseqc/tin/main.nf] + last_sha: +description: | + Calculte TIN (transcript integrity number) from RNA-seq reads + +argument_groups: +- name: "Input" + arguments: + - name: "--bam_input" + type: file + required: true + description: Path to input alignment file in BAM or SAM format. + + - name: "--bai_input" + type: file + required: true + description: Path to bam index file in bai format. + + - name: "--refgene" + type: file + required: true + description: BED file containing the reference gene model + + - name: "--minimum_coverage" + type: integer + required: false + default: 10 + min: 1 + description: Minimum number of reads mapped to a transcript, default = 10. + + - name: "--sample_size" + type: integer + required: false + default: 100 + min: 1 + description: Number of equal-spaced nucleotide positions picked from mRNA. Note, if this number is larger than the length of mRNA (L), it will be halved until it's smaller than L (default = 100) + + - name: "--subtract_background" + type: boolean_true + description: Set flag to subtract background noise (estimated from intronic reads). Only use this option if there are substantial intronic reads. + +- name: "Output" + arguments: + - name: "--output_tin_summary" + type: file + direction: output + required: false + default: $id.tin_summary.txt + description: summary statistics (txt) of calculated TIN metrics + + - name: "--output_tin" + type: file + direction: output + required: false + default: $id.tin.xls + description: file with TIN metrics (xls) + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam + - path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam.bai + - path: /testData/unit_test_resources/sarscov2/test.bed12 + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: "apt" + packages: [python3-pip] + - type: docker + run: | + pip3 install RSeQC +runners: +- type: executable +- type: nextflow diff --git a/src/rseqc/tin/script.sh b/src/rseqc/tin/script.sh new file mode 100644 index 0000000..2f06e7c --- /dev/null +++ b/src/rseqc/tin/script.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +set -eo pipefail + +bam_file="$(basename -- $par_bam_input)" +bai_file="$(basename -- $par_bai_input)" + +echo "$bam_file" +echo "$bai_file" + +tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX") +function clean_up { + rm -rf "$tmpdir" +} + +cp $par_bam_input $tmpdir/$bam_file +cp $par_bai_input $tmpdir/$bai_file + +tin.py \ + -i $tmpdir/$bam_file \ + -r $par_refgene \ + -c $par_minimum_coverage \ + -n $par_sample_size \ + -s $par_subtract_background + +[[ -f "${bam_file%.*}.summary.txt" ]] && mv ${bam_file%.*}.summary.txt $par_output_tin_summary +[[ -f "${bam_file%.*}.tin.xls" ]] && mv ${bam_file%.*}.tin.xls $par_output_tin + +clean_up diff --git a/src/rseqc/tin/test.sh b/src/rseqc/tin/test.sh new file mode 100644 index 0000000..1330d1d --- /dev/null +++ b/src/rseqc/tin/test.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +gunzip "$meta_resources_dir/hg19_RefSeq.bed.gz" + +# define input and output for script +input_bam="$meta_resources_dir/test.paired_end.sorted.bam" +input_bai="$meta_resources_dir/test.paired_end.sorted.bam.bai" +input_bed="$meta_resources_dir/test.bed12" + +output_tin="tin.xls" +output_tin_summary="tin_summary.txt" + +# run executable and test +echo "> Running $meta_functionality_name" + +"$meta_executable" \ + --bam_input "$input_bam" \ + --bai_input "$input_bai" \ + --refgene "$input_bed" \ + --output_tin "$output_tin" \ + --output_tin_summary "$output_tin_summary" + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> Check if all output files were created" + +[ ! -f $output_tin ] && echo "$output_tin was not created" && exit 1 +[ ! -s $output_tin ] && echo "$output_tin is empty" && exit 1 +[ ! -f $output_tin_summary ] && echo "$output_tin_summary was not created" && exit 1 +[ ! -s $output_tin_summary ] && echo "$output_tin_summary is empty" && exit 1 + +exit 0 \ No newline at end of file diff --git a/src/sortmerna/config.vsh.yaml b/src/sortmerna/config.vsh.yaml new file mode 100644 index 0000000..3c1cd29 --- /dev/null +++ b/src/sortmerna/config.vsh.yaml @@ -0,0 +1,65 @@ +name: "sortmerna" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/sortmerna/main.nf, modules/nf-core/sortmerna/meta.yml] + last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 +description: | + Local sequence alignment tool for filtering, mapping and clustering. The main application of SortMeRNA is filtering rRNA from metatranscriptomic data. SortMeRNA takes as input files of reads (fasta, fastq, fasta.gz, fastq.gz) and one or multiple rRNA database file(s), and sorts apart aligned and rejected reads into two files. + +argument_groups: +- name: "Input" + arguments: + - name: "--paired" + type: boolean + description: Are the reads single-end or paired-end + - name: "--input" + type: file + multiple: true + multiple_sep: "," + description: Input fastq + - name: "--ribo_database_manifest" + type: file + multiple: true + description: Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA. + +- name: "Output" + arguments: + - name: "--sortmerna_log" + type: file + direction: output + default: $id.sortmerna.log + required: false + must_exist: false + description: Sortmerna log file. + - name: "--fastq_1" + type: file + required: true + description: Output file for read 1. + direction: output + default: $id.$key.read_1.fastq.gz + - name: "--fastq_2" + type: file + required: false + must_exist: false + description: Output file for read 2. + direction: output + default: $id.$key.read_2.fastq.gz + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz + - path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz + - path: /testData/minimal_test/reference/rRNA + +engines: +- type: docker + image: quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0 +runners: +- type: executable +- type: nextflow diff --git a/src/sortmerna/script.sh b/src/sortmerna/script.sh new file mode 100755 index 0000000..8ef7c2d --- /dev/null +++ b/src/sortmerna/script.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +set -eo pipefail + +IFS="," read -ra input <<< "$par_input" +IFS=";" read -ra paths <<< "$par_ribo_database_manifest" +refs="" +for i in "${paths[@]}" +do + refs+="-ref $i " +done + +if [ "$par_paired" == "false" ]; then + sortmerna \ + $refs \ + -reads ${input[0]} \ + --threads ${meta_cpus:-1} \ + --workdir . \ + --aligned rRNA_reads \ + --fastx \ + -num_alignments 1 \ + --other non_rRNA_reads + mv non_rRNA_reads.f*q.gz "$par_fastq_1" +else + sortmerna \ + $refs \ + -reads ${input[0]} \ + --reads ${input[1]} \ + --threads ${meta_cpus:-1} \ + --workdir . \ + --aligned rRNA_reads \ + --fastx \ + --num_alignments 1 \ + --other non_rRNA_reads \ + --paired_in \ + --out2 + mv non_rRNA_reads_fwd.f*q.gz $par_fastq_1 + mv non_rRNA_reads_rev.f*q.gz $par_fastq_2 +fi + +mv rRNA_reads.log $par_sortmerna_log + diff --git a/src/sortmerna/test.sh b/src/sortmerna/test.sh new file mode 100644 index 0000000..06ceb5d --- /dev/null +++ b/src/sortmerna/test.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +echo ">>> Testing $meta_functionality_name" + +# find $meta_resources_dir/rRNA -type f > rrna-db-defaults.txt + +echo ">>> Testing for paired-end reads" +"$meta_executable" \ + --paired true \ + --input $meta_resources_dir/SRR6357070_1.fastq.gz,$meta_resources_dir/SRR6357070_2.fastq.gz \ + --ribo_database_manifest "$meta_resources_dir/rRNA/silva-arc-16s-id95.fasta;$meta_resources_dir/rRNA/silva-euk-18s-id95.fasta" \ + --sortmerna_log SRR6357070_sortmerna.log \ + --fastq_1 SRR6357070_read_1.fastq.gz \ + --fastq_2 SRR6357070_read_2.fastq.gz + +echo ">> Checking if the correct files are present" +[[ ! -f "SRR6357070_read_1.fastq.gz" ]] || [[ ! -f "SRR6357070_read_2.fastq.gz" ]] && echo "Output fastq file is missing!" && exit 1 +[[ ! -s "SRR6357070_read_1.fastq.gz" ]] || [[ ! -s "SRR6357070_read_2.fastq.gz" ]] && echo "Output fastq file is empty!" && exit 1 +[ ! -f "SRR6357070_sortmerna.log" ] && echo "Output log file is missing!" && exit 1 +[ ! -s "SRR6357070_sortmerna.log" ] && echo "Output log file is empty!" && exit 1 + +rm SRR6357070_read_1.fastq.gz SRR6357070_read_2.fastq.gz SRR6357070_sortmerna.log +rm -rf kvdb/ + +echo ">>> Testing for single-end reads" +"$meta_executable" \ + --paired false \ + --input $meta_resources_dir/SRR6357070_1.fastq.gz \ + --ribo_database_manifest "$meta_resources_dir/rRNA/silva-arc-16s-id95.fasta;$meta_resources_dir/rRNA/silva-euk-18s-id95.fasta" \ + --sortmerna_log SRR6357070_sortmerna.log \ + --fastq_1 SRR6357070_read_1.fastq.gz + +echo ">> Checking if the correct files are present" +[ ! -f "SRR6357070_read_1.fastq.gz" ] && echo "Output fastq file is missing!" && exit 1 +[ ! -s "SRR6357070_read_1.fastq.gz" ] && echo "Output fastq file is empty!" && exit 1 +[ ! -f "SRR6357070_sortmerna.log" ] && echo "Output log file is missing!" && exit 1 +[ ! -s "SRR6357070_sortmerna.log" ] && echo "Output log file is empty!" && exit 1 + +echo ">>> Test finished successfully" +exit 0 + diff --git a/src/stringtie/config.vsh.yaml b/src/stringtie/config.vsh.yaml new file mode 100644 index 0000000..1a9cdab --- /dev/null +++ b/src/stringtie/config.vsh.yaml @@ -0,0 +1,70 @@ +name: stringtie +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/stringtie/stringtie/main.nf, modules/nf-core/stringtie/stringtie/meta.yml] + last_sha: 55398de6ab7577acfe9b1180016a93d7af7eb859 +description: | + Transcript assembly and quantification for RNA-Seq + +argument_groups: +- name: "Input" + arguments: + - name: "--strandedness" + type: string + description: Forward or reverse strand? + - name: "--bam" + type: file + - name: "--annotation_gtf" + type: file + - name: "--extra_stringtie_args" + type: string + description: Extra arguments for running StringTie + - name: "--stringtie_ignore_gtf" + type: boolean + description: Perform reference-guided de novo assembly of transcripts using StringTie, i.e. don't restrict to those in GTF file. + +- name: "Output" + arguments: + - name: "--transcript_gtf" + type: file + default: $id.$key.transcripts.gtf + direction: output + - name: "--coverage_gtf" + type: file + default: $id.$key.coverage.gtf + direction: output + - name: "--abundance" + type: file + default: $id.$key.abundance.txt + direction: output + - name: "--ballgown" + type: file + description: for running ballgown + default: $id.$key.ballgown + direction: output + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/unit_test_resources/genes.gtf + - path: /testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: docker + run: | + apt-get update && \ + apt-get install -y build-essential zlib1g wget && \ + wget --no-check-certificate https://github.com/gpertea/stringtie/releases/download/v2.2.1/stringtie-2.2.1.Linux_x86_64.tar.gz && \ + tar -xzf stringtie-2.2.1.Linux_x86_64.tar.gz && \ + cp stringtie-2.2.1.Linux_x86_64/stringtie /usr/local/bin/ +runners: +- type: executable +- type: nextflow diff --git a/src/stringtie/script.sh b/src/stringtie/script.sh new file mode 100644 index 0000000..ee3cbc6 --- /dev/null +++ b/src/stringtie/script.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -eo pipefail + +if [ $par_strandedness=='forward' ]; then + strand='--fr' +elif [ $par_strandedness=='reverse' ]; then + strand='--rf' +fi + +stringtie \ + $par_bam \ + $strand \ + ${par_annotation_gtf:+-G $par_annotation_gtf} \ + -o $par_transcript_gtf \ + -A "abundance.txt" \ + ${par_annotation_gtf:+-C "coverage.gtf"} \ + ${par_annotation_gtf:+-b "ballgown"} \ + -p ${meta_cpus:-1} \ + $par_extra_stringtie_args \ + ${par_stringtie_ignore_gtf:+-e} + +mv coverage.gtf $par_coverage_gtf +mv ballgown $par_ballgown +mv abundance.txt $par_abundance diff --git a/src/stringtie/test.sh b/src/stringtie/test.sh new file mode 100644 index 0000000..689b36a --- /dev/null +++ b/src/stringtie/test.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +echo ">>> Testing $meta_functionality_name" + +"$meta_executable" \ + --strandedness reverse \ + --bam $meta_resources_dir/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam \ + --annotation_gtf $meta_resources_dir/genes.gtf \ + --extra_stringtie_args "-v" \ + --transcript_gtf test.transcripts.gtf \ + --coverage_gtf test.coverage.gtf \ + --abundance test.abundance.txt \ + --ballgown test.ballgown + +echo ">> Checking if the correct files are present" +[ ! -d "test.ballgown" ] && echo "Directory 'test.ballgown' does not exist!" && exit 1 +[ -z "$(ls -A 'test.ballgown')" ] && echo "Directory 'test.ballgown' is empty!" && exit 1 +[ ! -f "test.transcripts.gtf" ] && echo "File 'test.transcripts.gtf' does not exist!" && exit 1 +[ ! -s "test.transcripts.gtf" ] && echo "File 'test.transcripts.gtf' is empty!" && exit 1 +[ ! -f "test.coverage.gtf" ] && echo "File 'test.coverage.gtf' does not exist!" && exit 1 +[ ! -s "test.coverage.gtf" ] && echo "File 'test.coverage.gtf' is empty!" && exit 1 +[ ! -f "test.abundance.txt" ] && echo "File 'test.abundance.txt' does not exist!" && exit 1 +[ ! -s "test.abundance.txt" ] && echo "File 'test.abundance.txt' is empty!" && exit 1 + +echo ">>> Test finished successfully" +exit 0 diff --git a/src/summarizedexperiment/config.vsh.yaml b/src/summarizedexperiment/config.vsh.yaml new file mode 100644 index 0000000..f0968a3 --- /dev/null +++ b/src/summarizedexperiment/config.vsh.yaml @@ -0,0 +1,48 @@ +name: "summarizedexperiment" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/local/summarizedexperiment/main.nf] + last_sha: 0a1bdcfbb498987643b74e9fccab85ccd9f2a17d +description: Create SummarizedExperiment object from Salmon counts + +argument_groups: +- name: "Input" + arguments: + - name: "--tpm_gene" + type: file + - name: "--counts_gene" + type: file + - name: "--counts_gene_length_scaled" + type: file + - name: "--counts_gene_scaled" + type: file + - name: "--tpm_transcript" + type: file + - name: "--counts_transcript" + type: file + - name: "--tx2gene_tsv" + type: file + +- name: "Output" + arguments: + - name: "--output" + type: file + direction: output + default: merged_summarizedexperiment + +resources: + - type: bash_script + path: script.sh + # copied from https://github.com/nf-core/rnaseq/blob/3.12.0/bin/salmon_summarizedexperiment.r + - path: summarizedexperiment.r + +engines: +- type: docker + image: rocker/r2u:22.04 + setup: + - type: r + bioc: [ SummarizedExperiment, tximeta ] +runners: +- type: executable +- type: nextflow diff --git a/src/summarizedexperiment/script.sh b/src/summarizedexperiment/script.sh new file mode 100755 index 0000000..0e16042 --- /dev/null +++ b/src/summarizedexperiment/script.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -eo pipefail + +mkdir -p $par_output + +Rscript "$meta_resources_dir/summarizedexperiment.r" NULL $par_counts_gene $par_tpm_gene $par_tx2gene_tsv + +Rscript "$meta_resources_dir/summarizedexperiment.r" NULL $par_counts_gene_length_scaled $par_tpm_gene $par_tx2gene_tsv + +Rscript "$meta_resources_dir/summarizedexperiment.r" NULL $par_counts_gene_scaled $par_tpm_gene $par_tx2gene_tsv + +Rscript "$meta_resources_dir/summarizedexperiment.r" NULL $par_counts_transcript $par_tpm_transcript $par_tx2gene_tsv + +mv ${par_counts_gene%.*}.rds $par_output/ +mv ${par_counts_gene_length_scaled%.*}.rds $par_output/ +mv ${par_counts_gene_scaled%.*}.rds $par_output/ +mv ${par_counts_transcript%.*}.rds $par_output/ diff --git a/src/summarizedexperiment/summarizedexperiment.r b/src/summarizedexperiment/summarizedexperiment.r new file mode 100755 index 0000000..5d656a7 --- /dev/null +++ b/src/summarizedexperiment/summarizedexperiment.r @@ -0,0 +1,68 @@ +#!/usr/bin/env Rscript + +library(SummarizedExperiment) + +## Create SummarizedExperiment (se) object from Salmon counts + +args <- commandArgs(trailingOnly = TRUE) +if (length(args) < 2) { + stop("Usage: salmon_se.r ", call. = FALSE) +} + +coldata <- args[1] +counts_fn <- args[2] +tpm_fn <- args[3] +tx2gene <- args[4] + +info <- file.info(tx2gene) +if (info$size == 0) { + tx2gene <- NULL +} else { + rowdata <- read.csv(tx2gene, sep = "\t", header = FALSE) + colnames(rowdata) <- c("tx", "gene_id", "gene_name") + tx2gene <- rowdata[, 1:2] +} + +counts <- read.csv(counts_fn, row.names = 1, sep = "\t") +counts <- counts[, 2:ncol(counts), drop = FALSE] # remove gene_name column +tpm <- read.csv(tpm_fn, row.names = 1, sep = "\t") +tpm <- tpm[, 2:ncol(tpm), drop = FALSE] # remove gene_name column + +if (length(intersect(rownames(counts), rowdata[["tx"]])) > length(intersect(rownames(counts), rowdata[["gene_id"]]))) { + by_what <- "tx" +} else { + by_what <- "gene_id" + rowdata <- unique(rowdata[, 2:3]) +} + +if (file.exists(coldata)) { + coldata <- read.csv(coldata, sep = "\t") + coldata <- coldata[match(colnames(counts), coldata[, 1]), ] + coldata <- cbind(files = fns, coldata) +} else { + message("ColData not avaliable ", coldata) + coldata <- data.frame(files = colnames(counts), names = colnames(counts)) +} + +rownames(coldata) <- coldata[["names"]] +extra <- setdiff(rownames(counts), as.character(rowdata[[by_what]])) +if (length(extra) > 0) { + rowdata <- rbind( + rowdata, + data.frame( + tx = extra, + gene_id = extra, + gene_name = extra + )[, colnames(rowdata)] + ) +} + +rowdata <- rowdata[match(rownames(counts), as.character(rowdata[[by_what]])), ] +rownames(rowdata) <- rowdata[[by_what]] +se <- SummarizedExperiment( + assays = list(counts = counts, abundance = tpm), + colData = DataFrame(coldata), + rowData = rowdata +) + +saveRDS(se, file = paste0(tools::file_path_sans_ext(counts_fn), ".rds")) diff --git a/src/tx2gene/config.vsh.yaml b/src/tx2gene/config.vsh.yaml new file mode 100644 index 0000000..727f659 --- /dev/null +++ b/src/tx2gene/config.vsh.yaml @@ -0,0 +1,55 @@ +name: "tx2gene" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/local/tx2gene/main.nf] + last_sha: 839ac5cab892504514cc96d44e99e70516b239d2 +description: Get transcript id (tx) to gene names for tximport + +argument_groups: +- name: "Input" + arguments: + - name: "--quant_results" + type: file + multiple: true + multiple_sep: ";" + - name: "--gtf" + type: file + - name: "--gtf_extra_attributes" + type: string + default: 'gene_name' + - name: "--gtf_group_features" + type: string + default: 'gene_id' + - name: "--quant_type" + type: string + description: Method used for quantification + choices: ["salmon", "kallisto"] + +- name: "Output" + arguments: + - name: "--tsv" + type: file + direction: output + default: tx2gene.tsv + +resources: + - type: bash_script + path: script.sh + # copied from https://github.com/nf-core/rnaseq/blob/3.14.0/bin/tx2gene.py + - path: tx2gene.py + +test_resources: + - type: bash_script + path: test.sh + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: apt + packages: [pip, unzip] + - type: python +runners: +- type: executable +- type: nextflow diff --git a/src/tx2gene/script.sh b/src/tx2gene/script.sh new file mode 100755 index 0000000..33b5f6d --- /dev/null +++ b/src/tx2gene/script.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +set -eo pipefail + +function clean_up { + rm -rf "$tmpdir" +} +trap clean_up EXIT + +tmpdir=$(mktemp -d "$meta_temp_dir/$meta_name-XXXXXXXX") + +IFS=";" read -ra results <<< $par_quant_results +for result in ${results[*]} +do + cp -r $result $tmpdir +done + +python3 "$meta_resources_dir/tx2gene.py" \ + --quant_type $par_quant_type \ + --gtf $par_gtf \ + --quants $tmpdir \ + --id $par_gtf_group_features \ + --extra $par_gtf_extra_attributes \ + -o $par_tsv diff --git a/src/tx2gene/test.sh b/src/tx2gene/test.sh new file mode 100644 index 0000000..9077a00 --- /dev/null +++ b/src/tx2gene/test.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +set -e + +echo "> Prepare test data" + +cat > "sample1_quant_results.sf" << HERE +Name Length EffectiveLength TPM NumReads +ENSSASG00005000004 3822 3572 15216.8 753 +ENSSASG00005000003 13218 12968 1502.34 269.9 +ENSSASG00005000002 21290 21040 23916.3 6971.1 +HERE + +cat > "sample2_quant_results.sf" << HERE +Name Length EffectiveLength TPM NumReads +ENSSASG00005000004 3822 3572 23713.5 703 +ENSSASG00005000003 13218 12968 14280 1536.92 +ENSSASG00005000002 21290 21040 37447.4 6539.08 +HERE + +cat > "genes.gtf" << HERE +MN908947.3 ensembl transcript 266 21555 . + . gene_id "ENSSASG00005000002"; gene_version "1"; transcript_id "ENSSAST00005000002"; transcript_version "1"; gene_name "ORF1ab"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "ORF1ab"; transcript_source "ensembl"; transcript_biotype "protein_coding"; +MN908947.3 ensembl exon 266 21555 . + . gene_id "ENSSASG00005000002"; gene_version "1"; transcript_id "ENSSAST00005000002"; transcript_version "1"; exon_number "1"; gene_name "ORF1ab"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "ORF1ab"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSSASE00005000002"; exon_version "1"; +MN908947.3 ensembl CDS 266 21552 . + 0 gene_id "ENSSASG00005000002"; gene_version "1"; transcript_id "ENSSAST00005000002"; transcript_version "1"; exon_number "1"; gene_name "ORF1ab"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "ORF1ab"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSSASP00005000002"; protein_version "1"; +MN908947.3 ensembl start_codon 266 268 . + 0 gene_id "ENSSASG00005000002"; gene_version "1"; transcript_id "ENSSAST00005000002"; transcript_version "1"; exon_number "1"; gene_name "ORF1ab"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "ORF1ab"; transcript_source "ensembl"; transcript_biotype "protein_coding"; +MN908947.3 ensembl stop_codon 21553 21555 . + 0 gene_id "ENSSASG00005000002"; gene_version "1"; transcript_id "ENSSAST00005000002"; transcript_version "1"; exon_number "1"; gene_name "ORF1ab"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "ORF1ab"; transcript_source "ensembl"; transcript_biotype "protein_coding"; +MN908947.3 ensembl transcript 266 13483 . + . gene_id "ENSSASG00005000003"; gene_version "1"; transcript_id "ENSSAST00005000003"; transcript_version "1"; gene_name "ORF1ab"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "ORF1a"; transcript_source "ensembl"; transcript_biotype "protein_coding"; +MN908947.3 ensembl exon 266 13483 . + . gene_id "ENSSASG00005000003"; gene_version "1"; transcript_id "ENSSAST00005000003"; transcript_version "1"; exon_number "1"; gene_name "ORF1ab"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "ORF1a"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSSASE00005000003"; exon_version "1"; +MN908947.3 ensembl CDS 266 13480 . + 0 gene_id "ENSSASG00005000003"; gene_version "1"; transcript_id "ENSSAST00005000003"; transcript_version "1"; exon_number "1"; gene_name "ORF1ab"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "ORF1a"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSSASP00005000003"; protein_version "1"; +MN908947.3 ensembl start_codon 266 268 . + 0 gene_id "ENSSASG00005000003"; gene_version "1"; transcript_id "ENSSAST00005000003"; transcript_version "1"; exon_number "1"; gene_name "ORF1ab"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "ORF1a"; transcript_source "ensembl"; transcript_biotype "protein_coding"; +MN908947.3 ensembl stop_codon 13481 13483 . + 0 gene_id "ENSSASG00005000003"; gene_version "1"; transcript_id "ENSSAST00005000003"; transcript_version "1"; exon_number "1"; gene_name "ORF1ab"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "ORF1a"; transcript_source "ensembl"; transcript_biotype "protein_coding"; +MN908947.3 ensembl transcript 21563 25384 . + . gene_id "ENSSASG00005000004"; gene_version "1"; transcript_id "ENSSAST00005000004"; transcript_version "1"; gene_name "S"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "S"; transcript_source "ensembl"; transcript_biotype "protein_coding"; +MN908947.3 ensembl exon 21563 25384 . + . gene_id "ENSSASG00005000004"; gene_version "1"; transcript_id "ENSSAST00005000004"; transcript_version "1"; exon_number "1"; gene_name "S"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "S"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSSASE00005000004"; exon_version "1"; +MN908947.3 ensembl CDS 21563 25381 . + 0 gene_id "ENSSASG00005000004"; gene_version "1"; transcript_id "ENSSAST00005000004"; transcript_version "1"; exon_number "1"; gene_name "S"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "S"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSSASP00005000004"; protein_version "1"; +MN908947.3 ensembl start_codon 21563 21565 . + 0 gene_id "ENSSASG00005000004"; gene_version "1"; transcript_id "ENSSAST00005000004"; transcript_version "1"; exon_number "1"; gene_name "S"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "S"; transcript_source "ensembl"; transcript_biotype "protein_coding"; +MN908947.3 ensembl stop_codon 25382 25384 . + 0 gene_id "ENSSASG00005000004"; gene_version "1"; transcript_id "ENSSAST00005000004"; transcript_version "1"; exon_number "1"; gene_name "S"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "S"; transcript_source "ensembl"; transcript_biotype "protein_coding"; +HERE + +echo "> Run test" +"$meta_executable" \ + --quant_results "sample1_quant_results.sf;sample2_quant_results.sf" \ + --gtf "genes.gtf" \ + --quant_type "salmon" \ + --gtf_extra_attributes "gene_name" \ + --gtf_group_features "gene_id" \ + --tsv "tx2gene.tsv" + +echo "> Check results" +[ ! -f "tx2gene.tsv" ] && echo "tx2gene.tsv was not created" && exit 1 +[ ! -s "tx2gene.tsv" ] && echo "tx2gene.tsv is empty" && exit 1 + +exit 0 diff --git a/src/tx2gene/tx2gene.py b/src/tx2gene/tx2gene.py new file mode 100755 index 0000000..dd50079 --- /dev/null +++ b/src/tx2gene/tx2gene.py @@ -0,0 +1,169 @@ + +#!/usr/bin/env python + +# Written by Lorena Pantano with subsequent reworking by Jonathan Manning. Released under the MIT license. + +import logging +import argparse +import glob +import os +import re +from collections import Counter, defaultdict, OrderedDict +from collections.abc import Set +from typing import Dict + +# Configure logging +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def read_top_transcripts(quant_dir: str, file_pattern: str) -> Set[str]: + """ + Read the top 100 transcripts from the quantification file. + + Parameters: + quant_dir (str): Directory where quantification files are located. + file_pattern (str): Pattern to match quantification files. + + Returns: + set: A set containing the top 100 transcripts. + """ + try: + # Find the quantification file within the directory + quant_file_path = glob.glob(os.path.join(quant_dir, file_pattern))[0] + with open(quant_file_path, "r") as file_handle: + # Read the file and extract the top 100 transcripts + return {line.split()[0] for i, line in enumerate(file_handle) if i > 0 and i <= 100} + except IndexError: + # Log an error and raise a FileNotFoundError if the quant file does not exist + logger.error("No quantification files found.") + raise FileNotFoundError("Quantification file not found.") + + +def discover_transcript_attribute(gtf_file: str, transcripts: Set[str]) -> str: + """ + Discover the attribute in the GTF that corresponds to transcripts, prioritizing 'transcript_id'. + + Parameters: + gtf_file (str): Path to the GTF file. + transcripts (Set[str]): A set of transcripts to match in the GTF file. + + Returns: + str: The attribute name that corresponds to transcripts in the GTF file. + """ + + votes = Counter() + with open(gtf_file) as inh: + # Read GTF file, skipping header lines + for line in filter(lambda x: not x.startswith("#"), inh): + cols = line.split("\t") + + # Use regular expression to correctly split the attributes string + attributes_str = cols[8] + attributes = dict(re.findall(r'(\S+) "(.*?)(? Dict[str, str]: + """ + Parse the attributes column of a GTF file. + + :param attributes_text: The attributes column as a string. + :return: A dictionary of the attributes. + """ + # Split the attributes string by semicolon and strip whitespace + attributes = attributes_text.strip().split(";") + attr_dict = OrderedDict() + + # Iterate over each attribute pair + for attribute in attributes: + # Split the attribute into key and value, ensuring there are two parts + parts = attribute.strip().split(" ", 1) + if len(parts) == 2: + key, value = parts + # Remove any double quotes from the value + value = value.replace('"', "") + attr_dict[key] = value + + return attr_dict + + +def map_transcripts_to_gene( + quant_type: str, gtf_file: str, quant_dir: str, gene_id: str, extra_id_field: str, output_file: str +) -> bool: + """ + Map transcripts to gene names and write the output to a file. + + Parameters: + quant_type (str): The quantification method used (e.g., 'salmon'). + gtf_file (str): Path to the GTF file. + quant_dir (str): Directory where quantification files are located. + gene_id (str): The gene ID attribute in the GTF file. + extra_id_field (str): Additional ID field in the GTF file. + output_file (str): The output file path. + + Returns: + bool: True if the operation was successful, False otherwise. + """ + # Read the top transcripts based on quantification type + transcripts = read_top_transcripts(quant_dir, "*quant_results.sf" if quant_type == "salmon" else "*abundance.tsv") + # Discover the attribute that corresponds to transcripts in the GTF + transcript_attribute = discover_transcript_attribute(gtf_file, transcripts) + + if not transcript_attribute: + # If no attribute is found, return False + return False + + # Open GTF and output file to write the mappings + # Initialize the set to track seen combinations + seen = set() + + with open(gtf_file) as inh, open(output_file, "w") as output_handle: + # Parse each line of the GTF, mapping transcripts to genes + for line in filter(lambda x: not x.startswith("#"), inh): + cols = line.split("\t") + attr_dict = parse_attributes(cols[8]) + if gene_id in attr_dict and transcript_attribute in attr_dict: + # Create a unique identifier for the transcript-gene combination + transcript_gene_pair = (attr_dict[transcript_attribute], attr_dict[gene_id]) + + # Check if the combination has already been seen + if transcript_gene_pair not in seen: + # If it's a new combination, write it to the output and add to the seen set + extra_id = attr_dict.get(extra_id_field, attr_dict[gene_id]) + output_handle.write(f"{attr_dict[transcript_attribute]}\t{attr_dict[gene_id]}\t{extra_id}\n") + seen.add(transcript_gene_pair) + + return True + + +# Main function to parse arguments and call the mapping function +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Map transcripts to gene names for tximport.") + parser.add_argument("--quant_type", type=str, help="Quantification type", default="salmon") + parser.add_argument("--gtf", type=str, help="GTF file", required=True) + parser.add_argument("--quants", type=str, help="Output of quantification", required=True) + parser.add_argument("--id", type=str, help="Gene ID in the GTF file", required=True) + parser.add_argument("--extra", type=str, help="Extra ID in the GTF file") + parser.add_argument("-o", "--output", dest="output", default="tx2gene.tsv", type=str, help="File with output") + + args = parser.parse_args() + if not map_transcripts_to_gene(args.quant_type, args.gtf, args.quants, args.id, args.extra, args.output): + logger.error("Failed to map transcripts to genes.") diff --git a/src/tximport/config.vsh.yaml b/src/tximport/config.vsh.yaml new file mode 100644 index 0000000..6304033 --- /dev/null +++ b/src/tximport/config.vsh.yaml @@ -0,0 +1,79 @@ +name: "tximport" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/local/tximport/main.nf] + last_sha: 489bcb4efdc7bd58839b22b0360d26b4d80b87a8 +description: Get dataframe linking transcript ID, gene ID, and gene name + +argument_groups: +- name: "Input" + arguments: + - name: "--quant_results" + type: file + multiple: true + multiple_sep: ";" + - name: "--tx2gene_tsv" + type: file + - name: "--quant_type" + type: string + description: Method used for quantification + choices: ["salmon", "kallisto"] + +- name: "Output" + arguments: + - name: "--tpm_gene" + type: file + direction: output + default: merged.gene_tpm.tsv + - name: "--counts_gene" + type: file + direction: output + default: merged.gene_counts.tsv + - name: "--counts_gene_length_scaled" + type: file + direction: output + default: merged.gene_counts_length_scaled.tsv + - name: "--counts_gene_scaled" + type: file + direction: output + default: merged.gene_counts_scaled.tsv + - name: "--lengths_gene" + type: file + direction: output + default: merged.gene_lengths.tsv + - name: "--tpm_transcript" + type: file + direction: output + default: merged.transcript_tpm.tsv + - name: "--counts_transcript" + type: file + direction: output + default: merged.transcript_counts.tsv + - name: "--lengths_transcript" + type: file + direction: output + default: merged.transcript_lengths.tsv + +resources: + - type: bash_script + path: script.sh + # copied from https://github.com/nf-core/rnaseq/blob/3.14.0/bin/tximport.r + - path: tximport.r + +test_resources: + - type: bash_script + path: test.sh + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: apt + packages: [ r-base, libcurl4-openssl-dev, libssl-dev, libxml2-dev ] + - type: r + bioc: [ SummarizedExperiment, tximport, tximeta ] + cran: [ jsonlite ] +runners: +- type: executable +- type: nextflow diff --git a/src/tximport/script.sh b/src/tximport/script.sh new file mode 100755 index 0000000..69a5b86 --- /dev/null +++ b/src/tximport/script.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -eo pipefail + +function clean_up { + rm -rf "$tmpdir" +} +trap clean_up EXIT + +tmpdir=$(mktemp -d "$meta_temp_dir/$meta_name-XXXXXXXX") + +IFS=";" read -ra results <<< $par_quant_results +for result in ${results[*]} +do + cp -r $result $tmpdir +done + +Rscript "$meta_resources_dir/tximport.r" \ + NULL \ + $tmpdir \ + merged \ + $par_quant_type \ + $par_tx2gene_tsv diff --git a/src/tximport/test.sh b/src/tximport/test.sh new file mode 100644 index 0000000..c3af1b6 --- /dev/null +++ b/src/tximport/test.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +set -e + +echo "> Prepare test data" + +cat > "sample1_quant_results.sf" << HERE +Name Length EffectiveLength TPM NumReads +ENSSASG00005000004 3822 3572 15216.8 753 +ENSSASG00005000003 13218 12968 1502.34 269.9 +ENSSASG00005000002 21290 21040 23916.3 6971.1 +HERE + +cat > "sample2_quant_results.sf" << HERE +Name Length EffectiveLength TPM NumReads +ENSSASG00005000004 3822 3572 23713.5 703 +ENSSASG00005000003 13218 12968 14280 1536.92 +ENSSASG00005000002 21290 21040 37447.4 6539.08 +HERE + +cat > tx2gene.tsv << HERE +ENSSASG00005000002 ENSSASG00005000002 ORF1ab +ENSSASG00005000003 ENSSASG00005000003 ORF1ab +ENSSASG00005000004 ENSSASG00005000004 S +HERE + +echo "> Run test" +"$meta_executable" \ + --quant_results "sample1_quant_results.sf;sample2_quant_results.sf" \ + --tx2gene_tsv "tx2gene.tsv" \ + --quant_type "salmon" \ + # --tpm_gene gene_tpm.tsv \ + # --counts_gene gene_counts.tsv \ + # --counts_gene_length_scaled gene_counts_length_scaled.tsv \ + # --counts_gene_scaled gene_counts_scaled.tsv \ + # --lengths_gene gene_length.tsv \ + # --tpm_transcript transcript_tpm.tsv \ + # --counts_transcript transcript_counts.tsv \ + # --lengths_transcript transcript_length.tsv + +echo "> Check results" +[ ! -f "merged.gene_tpm.tsv" ] && echo "merged.gene_tpm.tsv was not created" && exit 1 +[ ! -s "merged.gene_tpm.tsv" ] && echo "merged.gene_tpm.tsv is empty" && exit 1 +[ ! -f "merged.gene_counts.tsv" ] && echo "merged.gene_counts.tsv was not created" && exit 1 +[ ! -s "merged.gene_counts.tsv" ] && echo "merged.gene_counts.tsv is empty" && exit 1 +[ ! -f "merged.gene_counts_length_scaled.tsv" ] && echo "merged.gene_counts_length_scaled.tsv was not created" && exit 1 +[ ! -s "merged.gene_counts_length_scaled.tsv" ] && echo "merged.gene_counts_length_scaled.tsv is empty" && exit 1 +[ ! -f "merged.gene_counts_scaled.tsv" ] && echo "merged.gene_counts_scaled.tsv was not created" && exit 1 +[ ! -s "merged.gene_counts_scaled.tsv" ] && echo "merged.gene_counts_scaled.tsv is empty" && exit 1 +[ ! -f "merged.gene_lengths.tsv" ] && echo "merged.gene_lengths.tsv was not created" && exit 1 +[ ! -s "merged.gene_lengths.tsv" ] && echo "merged.gene_lengths.tsv is empty" && exit 1 +[ ! -f "merged.transcript_tpm.tsv" ] && echo "merged.transcript_tpm.tsv was not created" && exit 1 +[ ! -s "merged.transcript_tpm.tsv" ] && echo "merged.transcript_tpm.tsv is empty" && exit 1 +[ ! -f "merged.transcript_counts.tsv" ] && echo "merged.transcript_counts.tsv was not created" && exit 1 +[ ! -s "merged.transcript_counts.tsv" ] && echo "merged.transcript_counts.tsv is empty" && exit 1 +[ ! -f "merged.transcript_lengths.tsv" ] && echo "merged.transcript_lengths.tsv was not created" && exit 1 +[ ! -s "merged.transcript_lengths.tsv" ] && echo "merged.transcript_lengths.tsv is empty" && exit 1 + +exit 0 \ No newline at end of file diff --git a/src/tximport/tximport.r b/src/tximport/tximport.r new file mode 100755 index 0000000..5036399 --- /dev/null +++ b/src/tximport/tximport.r @@ -0,0 +1,142 @@ +#!/usr/bin/env Rscript + +# Script for importing and processing transcript-level quantifications. +# Written by Lorena Pantano, later modified by Jonathan Manning, and released under the MIT license. + +# Loading required libraries +library(SummarizedExperiment) +library(tximport) + +# Parsing command line arguments +args <- commandArgs(trailingOnly=TRUE) +if (length(args) < 4) { + stop("Usage: tximport.r ", + call.=FALSE) +} + +# Assigning command line arguments to variables +coldata_path <- args[1] +path <- args[2] +prefix <- args[3] +quant_type <- args[4] +tx2gene_path <- args[5] + +## Functions + +# Build a table from a SummarizedExperiment object +build_table <- function(se.obj, slot) { + cbind(rowData(se.obj)[,1:2], assays(se.obj)[[slot]]) +} + +# Write a table to a file with given parameters +write_se_table <- function(params) { + file_name <- paste0(prefix, ".", params$suffix) + write.table(build_table(params$obj, params$slot), file_name, + sep="\t", quote=FALSE, row.names = FALSE) +} + +# Read transcript metadata from a given path +read_transcript_info <- function(tinfo_path){ + info <- file.info(tinfo_path) + if (info$size == 0) { + stop("tx2gene file is empty") + } + + transcript_info <- read.csv(tinfo_path, sep="\t", header = FALSE, + col.names = c("tx", "gene_id", "gene_name")) + + extra <- setdiff(rownames(txi[[1]]), as.character(transcript_info[["tx"]])) + transcript_info <- rbind(transcript_info, data.frame(tx=extra, gene_id=extra, gene_name=extra)) + transcript_info <- transcript_info[match(rownames(txi[[1]]), transcript_info[["tx"]]), ] + rownames(transcript_info) <- transcript_info[["tx"]] + + list(transcript = transcript_info, + gene = unique(transcript_info[,2:3]), + tx2gene = transcript_info[,1:2]) +} + +# Read and process sample/column data from a given path +read_coldata <- function(coldata_path){ + if (file.exists(coldata_path)) { + coldata <- read.csv(coldata_path, sep="\t") + coldata <- coldata[match(names, coldata[,1]),] + coldata <- cbind(files = fns, coldata) + } else { + message("ColData not available: ", coldata_path) + coldata <- data.frame(files = fns, names = names) + } + rownames(coldata) <- coldata[["names"]] +} + +# Create a SummarizedExperiment object with given data +create_summarized_experiment <- function(counts, abundance, length, col_data, row_data) { + SummarizedExperiment(assays = list(counts = counts, abundance = abundance, length = length), + colData = col_data, + rowData = row_data) +} + +# Main script starts here + +# Define pattern for file names based on quantification type +pattern <- ifelse(quant_type == "kallisto", "abundance.tsv", ".*quant_results\\.sf") +fns <- list.files(path, pattern = pattern, recursive = T, full.names = T) +names <- basename(fns) +names(fns) <- names +dropInfReps <- quant_type == "kallisto" + +# Import transcript-level quantifications +txi <- tximport(fns, type = quant_type, txOut = TRUE, dropInfReps = dropInfReps) + +# Read transcript and sample data +transcript_info <- read_transcript_info(tx2gene_path) +coldata <- read_coldata(coldata_path) + +# Create initial SummarizedExperiment object +se <- create_summarized_experiment(txi[["counts"]], txi[["abundance"]], txi[["length"]], + DataFrame(coldata), transcript_info$transcript) + +# Setting parameters for writing tables +params <- list( + list(obj = se, slot = "abundance", suffix = "transcript_tpm.tsv"), + list(obj = se, slot = "counts", suffix = "transcript_counts.tsv"), + list(obj = se, slot = "length", suffix = "transcript_lengths.tsv") +) + +# Process gene-level data if tx2gene mapping is available +if ("tx2gene" %in% names(transcript_info) && !is.null(transcript_info$tx2gene)) { + tx2gene <- transcript_info$tx2gene + gi <- summarizeToGene(txi, tx2gene = tx2gene) + gi.ls <- summarizeToGene(txi, tx2gene = tx2gene, countsFromAbundance = "lengthScaledTPM") + gi.s <- summarizeToGene(txi, tx2gene = tx2gene, countsFromAbundance = "scaledTPM") + + gene_info <- transcript_info$gene[match(rownames(gi[[1]]), transcript_info$gene[["gene_id"]]),] + rownames(gene_info) <- gene_info[["tx"]] + + col_data_frame <- DataFrame(coldata) + + # Create gene-level SummarizedExperiment objects + gse <- create_summarized_experiment(gi[["counts"]], gi[["abundance"]], gi[["length"]], + col_data_frame, gene_info) + gse.ls <- create_summarized_experiment(gi.ls[["counts"]], gi.ls[["abundance"]], gi.ls[["length"]], + col_data_frame, gene_info) + gse.s <- create_summarized_experiment(gi.s[["counts"]], gi.s[["abundance"]], gi.s[["length"]], + col_data_frame, gene_info) + + params <- c(params, list( + list(obj = gse, slot = "length", suffix = "gene_lengths.tsv"), + list(obj = gse, slot = "abundance", suffix = "gene_tpm.tsv"), + list(obj = gse, slot = "counts", suffix = "gene_counts.tsv"), + list(obj = gse.ls, slot = "abundance", suffix = "gene_tpm_length_scaled.tsv"), + list(obj = gse.ls, slot = "counts", suffix = "gene_counts_length_scaled.tsv"), + list(obj = gse.s, slot = "abundance", suffix = "gene_tpm_scaled.tsv"), + list(obj = gse.s, slot = "counts", suffix = "gene_counts_scaled.tsv") + )) +} + +# Writing tables for each set of parameters +done <- lapply(params, write_se_table) + +# Output session information and citations +# Removed for now because the 'tximeta' package is not found sometimes +# citation("tximeta") +sessionInfo() \ No newline at end of file diff --git a/src/ucsc/bedclip/config.vsh.yaml b/src/ucsc/bedclip/config.vsh.yaml new file mode 100644 index 0000000..907823d --- /dev/null +++ b/src/ucsc/bedclip/config.vsh.yaml @@ -0,0 +1,51 @@ +name: "bedclip" +namespace: "ucsc" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/ucsc/bedclip/main.nf, modules/nf-core/ucsc/bedclip/meta.yml] + last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 +description: Remove lines from bed file that refer to off-chromosome locations + +argument_groups: +- name: "Input" + arguments: + - name: "--input_bedgraph" + type: file + description: bedGraph file which should be converted + - name: "--sizes" + type: file + description: File with chromosome sizes + +- name: "Output" + arguments: + - name: "--output_bedgraph" + type: file + direction: output + description: bedGraph file after clipping + default: $id.$key.bedgraph + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/unit_test_resources/sarscov2/test.bedgraph + - path: /testData/unit_test_resources/sarscov2/genome.sizes + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: apt + packages: + - rsync + - libcurl4 + - type: docker + run: | + rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedClip /usr/local/bin/ +runners: +- type: executable +- type: nextflow diff --git a/src/ucsc/bedclip/script.sh b/src/ucsc/bedclip/script.sh new file mode 100644 index 0000000..9bb7118 --- /dev/null +++ b/src/ucsc/bedclip/script.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -eo pipefail + +bedClip $par_input_bedgraph $par_sizes $par_output_bedgraph diff --git a/src/ucsc/bedclip/test.sh b/src/ucsc/bedclip/test.sh new file mode 100644 index 0000000..d84aeab --- /dev/null +++ b/src/ucsc/bedclip/test.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +echo ">>> Testing $meta_functionality_name" + +"$meta_executable" \ + --input_bedgraph $meta_resources_dir/test.bedgraph \ + --sizes $meta_resources_dir/genome.sizes \ + --output_bedgraph test.clipped.bedgraph + +echo ">> Checking if the correct files are present" +[ ! -f "test.clipped.bedgraph" ] && echo "File 'test.clipped.bedgraph' does not exist!" && exit 1 +[ ! -s "test.clipped.bedgraph" ] && echo "File 'test.clipped.bedgraph' is empty!" && exit 1 + +echo ">>> Test finished successfully" +exit 0 diff --git a/src/ucsc/bedgraphtobigwig/config.vsh.yaml b/src/ucsc/bedgraphtobigwig/config.vsh.yaml new file mode 100644 index 0000000..49c52b5 --- /dev/null +++ b/src/ucsc/bedgraphtobigwig/config.vsh.yaml @@ -0,0 +1,51 @@ +name: "bedgraphtobigwig" +namespace: "ucsc" +info: + migration_info: + git_repo: https://github.com/nf-core/rnaseq.git + paths: [modules/nf-core/ucsc/bedgraphtobigwig/main.nf, modules/nf-core/ucsc/bedgraphtobigwig/meta.yml] + last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 +description: Convert a bedGraph file to bigWig format + +argument_groups: +- name: "Input" + arguments: + - name: "--bedgraph" + type: file + description: bedGraph file which should be converted + - name: "--sizes" + type: file + description: File with chromosome sizes + +- name: "Output" + arguments: + - name: "--bigwig" + type: file + direction: output + description: bigWig coverage file relative to genes on the input file + default: $id.$key.bigwig + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: /testData/unit_test_resources/sarscov2/test.bedgraph + - path: /testData/unit_test_resources/sarscov2/genome.sizes + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: apt + packages: + - rsync + - libcurl4 + - type: docker + run: | + rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedGraphToBigWig /usr/local/bin/ +runners: +- type: executable +- type: nextflow diff --git a/src/ucsc/bedgraphtobigwig/script.sh b/src/ucsc/bedgraphtobigwig/script.sh new file mode 100644 index 0000000..24f52fc --- /dev/null +++ b/src/ucsc/bedgraphtobigwig/script.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -eo pipefail + +bedGraphToBigWig $par_bedgraph $par_sizes $par_bigwig diff --git a/src/ucsc/bedgraphtobigwig/test.sh b/src/ucsc/bedgraphtobigwig/test.sh new file mode 100644 index 0000000..deba55d --- /dev/null +++ b/src/ucsc/bedgraphtobigwig/test.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +echo ">>> Testing $meta_functionality_name" + +"$meta_executable" \ + --bedgraph $meta_resources_dir/test.bedgraph \ + --sizes $meta_resources_dir/genome.sizes \ + --bigwig test.bigwig + +echo ">> Checking if the correct files are present" +[ ! -f "test.bigwig" ] && echo "File 'test.bigwig' does not exist!" && exit 1 +[ ! -s "test.bigwig" ] && echo "File 'test.bigwig' is empty!" && exit 1 + +echo ">>> Test finished successfully" +exit 0 diff --git a/src/workflows/genome_alignment_and_quant/config.vsh.yaml b/src/workflows/genome_alignment_and_quant/config.vsh.yaml new file mode 100644 index 0000000..af72c60 --- /dev/null +++ b/src/workflows/genome_alignment_and_quant/config.vsh.yaml @@ -0,0 +1,204 @@ +name: genome_alignment_and_quant +namespace: workflows +description: | + A viash sub-workflow for genome alignment and quantification stage of nf-core/rnaseq pipeline. + +argument_groups: + - name: "Input" + arguments: + - name: "--id" + required: true + type: string + description: ID of the sample. + example: foo + - name: "--fastq_1" + alternatives: [-i] + type: file + description: Path to the sample (or read 1 of paired end sample). + required: true + example: input.fastq.gz + - name: "--fastq_2" + type: file + required: false + description: Path to read 2 of the sample. + - name: "--strandedness" + type: string + required: false + description: Sample strand-specificity. Must be one of unstranded, forward, or reverse + choices: [forward, reverse, unstranded] + - name: "--gtf" + type: file + description: GTF file + - name: "--transcript_fasta" + type: file + description: Fasta file of the reference transcriptome. + - name: "--star_index" + type: file + description: STAR index directory. + - name: "--star_ignore_sjdbgtf" + type: boolean + default: false + description: When using pre-built STAR indices do not re-extract and use splice junctions from the GTF file + - name: --star_sjdb_gtf_feature_exon + type: string + description: Feature type in GTF file to be used as exons for building transcripts + - name: "--bam_csi_index" + type: boolean + default: false + description: Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes. + - name: "--umi_dedup_stats" + type: boolean + description: Generate output stats when running "umi_tools dedup". + default: false + - name: "--with_umi" + type: boolean + description: Enable UMI-based read deduplication. + default: false + - name: "--salmon_quant_libtype" + type: string + description: Override Salmon library type inferred based on strandedness defined in meta object. + - name: "--extra_salmon_quant_args" + type: string + default: '' + description: Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline. + - name: "--gtf_group_features" + type: string + default: 'gene_id' + description: Define the attribute type used to group features in the GTF file when running Salmon. + - name: "--gtf_extra_attributes" + type: string + default: 'gene_name' + description: By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon. + - name: --extra_rsem_calculate_expression_args + type: string + description: Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline. + - name: "--aligner" + type: string + description: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'. + choices: [star_salmon, star_rsem, hisat2] + default: "star_salmon" + - name: "--rsem_index" + type: file + description: Path to directory for pre-built RSEM index. + - name: "--salmon_index" + type: file + description: Path to directory for pre-built Salmon index. + + - name: "Output" + arguments: + - name: "--star_multiqc" + type: file + direction: output + default: $id_star.log + - name: "--genome_bam_sorted" + type: file + direction: output + default: $id.genome.bam + - name: "--genome_bam_index" + type: file + direction: output + default: $id.genome.bam.bai + - name: "--genome_bam_stats" + type: file + direction: output + default: $id.genome.stats + - name: "--genome_bam_flagstat" + type: file + direction: output + default: $id.genome.flagstat + - name: "--genome_bam_idxstats" + type: file + direction: output + default: $id.genome.idxstats + - name: "--transcriptome_bam" + type: file + direction: output + default: $id.transcriptome.bam + - name: "--transcriptome_bam_index" + type: file + direction: output + default: $id.transcriptome.bam.bai + - name: "--transcriptome_bam_stats" + type: file + direction: output + default: $id.transcriptome.stats + - name: "--transcriptome_bam_flagstat" + type: file + direction: output + default: $id.transcriptome.flagstat + - name: "--transcriptome_bam_idxstats" + type: file + direction: output + default: $id.transcriptome.idxstats + - name: "--quant_out_dir" + type: file + direction: output + default: $id.salmon_quant + - name: "--quant_results_file" + type: file + direction: output + default: $id.quant.sf + - name: "--salmon_multiqc" + type: file + direction: output + - name: "--rsem_counts_gene" + type: file + description: Expression counts on gene level + default: $id.genes.results + direction: output + - name: "--counts_transcripts" + type: file + description: Expression counts on transcript level + default: $id.isoforms.results + direction: output + - name: "--rsem_multiqc" + type: file + description: RSEM statistics + default: $id.stat + direction: output + - name: "--bam_star_rsem" + type: file + description: BAM file generated by STAR (optional) + default: $id.STAR.genome.bam + direction: output + - name: "--bam_genome_rsem" + type: file + description: Genome BAM file (optional) + default: $id.genome.bam + direction: output + - name: "--bam_transcript_rsem" + type: file + description: Transcript BAM file (optional) + default: $id.transcript.bam + direction: output + +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + +dependencies: + - name: star/star_align_reads + repository: biobox + - name: samtools/samtools_sort + repository: biobox + - name: samtools/samtools_index + repository: biobox + - name: samtools/samtools_stats + repository: biobox + - name: samtools/samtools_flagstat + repository: biobox + - name: samtools/samtools_idxstats + repository: biobox + - name: umi_tools/umi_tools_dedup + repository: biobox + - name: umi_tools/umi_tools_prepareforrsem + repository: biobox + - name: salmon/salmon_quant + repository: biobox + - name: rsem/rsem_calculate_expression + repository: biobox + +runners: +- type: executable +- type: nextflow diff --git a/src/workflows/genome_alignment_and_quant/main.nf b/src/workflows/genome_alignment_and_quant/main.nf new file mode 100644 index 0000000..70cdffd --- /dev/null +++ b/src/workflows/genome_alignment_and_quant/main.nf @@ -0,0 +1,431 @@ +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def paired = input.size() == 2 + [ id, state + [ paired: paired, input: input ] ] + } + + | star_align_reads.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "input": "fastq_1", + "input_r2": "fastq_2", + "genome_dir": "star_index", + "sjdb_gtf_file": "gtf", + "out_sam_attr_rg_line": "star_sam_attr_rg_line", + "sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon" + ], + toState: [ + "genome_bam": "aligned_reads", + "transcriptome_bam": "reads_aligned_to_transcriptome", + "star_multiqc": "log" + ], + args: [ + quant_mode: "TranscriptomeSAM", + twopass_mode: "Basic", + out_sam_type: "BAM;Unsorted", + run_rng_seed: 0, + out_filter_multimap_nmax: 20, + align_sjdb_overhang_min: 1, + out_sam_attributes: "NH;HI;AS;NM;MD", + quant_transcriptome_sam_output: "BanSingleEnd" + ], + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // GENOME BAM + | samtools_sort.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: ["input": "genome_bam"], + toState: ["genome_bam_sorted": "output"], + key: "genome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // + // Remove duplicate reads from BAM file based on UMIs + // + + // Deduplicate genome BAM file + | umi_tools_dedup.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: { id, state -> + def output_stats = state.umi_dedup_stats ? state.id : + [ paired: state.paired, + input: state.genome_bam, + bai: state.genome_bam_index, + output_stats: output_stats] + }, + toState: [ "genome_bam_sorted": "output" ], + key: "genome_deduped", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_deduped", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_deduped_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_deduped_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta", + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_deduped_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // Deduplicate transcriptome BAM file + + | samtools_sort.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam" ], + toState: [ "transcriptome_bam": "output" ], + key: "transcriptome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "csi": "bam_csi_index" + ], + toState: [ "transcriptome_bam_index": "output" ], + key: "transcriptome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "bai": "transcriptome_bam_index", + ], + toState: [ "transcriptome_bam_stats": "output" ], + key: "transcriptome_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_flagstat": "output" ], + key: "transcriptome_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_idxstats": "output" ], + key: "transcriptome_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + | umi_tools_dedup.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: { id, state -> + def output_stats = state.umi_dedup_stats ? state.id : + [ paired: state.paired, + input: state.transcriptome_bam, + bai: state.transcriptome_bam_index, + output_stats: output_stats] + }, + toState: [ "transcriptome_bam_deduped": "output" ], + key: "transcriptome_deduped", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_sort.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam_deduped" ], + toState: [ "transcriptome_bam": "output" ], + key: "transcriptome_deduped_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "csi": "bam_csi_index" + ], + toState: [ "transcriptome_bam_index": "output" ], + key: "transcriptome_deduped_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_stats": "output" ], + key: "transcriptome_deduped_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_flagstat": "output" ], + key: "transcriptome_deduped_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_idxstats": "output" ], + key: "transcriptome_deduped_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // Fix paired-end reads in name sorted BAM file + | umi_tools_prepareforrsem.run( + runIf: { id, state -> state.with_umi && state.paired && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam" ], + toState: [ "transcriptome_bam": "output" ], + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // Infer lib-type for salmon quant + | map { id, state -> + def lib_type = (state.paired) ? + ( + (state.strandedness == "forward") ? + "ISF" : + ( + (state.strandedness == "reverse") ? "ISR" : "IU" + ) + ) + : ( + (state.strandedness == "forward") ? + "SF" : + ( + (state.strandedness == "reverse") ? "SR" : "U" + ) + ) + [ id, state + [lib_type: lib_type] ] + } + + // Count reads from BAM alignments using Salmon + | salmon_quant.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "lib_type": "lib_type", + "alignments": "transcriptome_bam", + "targets": "transcript_fasta", + "gene_map": "gtf" + ], + toState: [ + "quant_out_dir": "output", + "quant_results_file": "quant_results" + ], + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + | map { id, state -> + def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state + [ id, mod_state ] + } + + | rsem_calculate_expression.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "id": "id", + "strandedness": "strandedness", + "paired": "paired", + "input": "input", + "index": "rsem_index", + "counts_gene": "rsem_counts_gene", + "counts_transcripts": "rsem_counts_transcripts", + "stat": "rsem_multiqc", + "logs": "star_multiqc", + "bam_star": "bam_star_rsem", + "bam_genome": "bam_genome_rsem", + "bam_transcript": "bam_transcript_rsem" + ], + args: [ + star: true, + star_output_genome_bam: true, + star_gzipped_read_file: true, + estimate_rspd: true, + seed: 1 + ], + toState: [ + "rsem_counts_gene": "counts_gene", + "rsem_counts_transcripts": "counts_transcripts", + "rsem_multiqc": "stat", + "star_multiqc": "logs", + "bam_star_rsem": "bam_star", + "bam_genome_rsem": "bam_genome", + "bam_transcript_rsem": "bam_transcript" + ], + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // RSEM_Star BAM + | samtools_sort.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: ["input": "bam_star_rsem"], + toState: ["genome_bam_sorted": "output"], + key: "genome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + [ "star_multiqc": "star_multiqc", + "rsem_multiqc": "rsem_multiqc", + "salmon_multiqc": "salmon_multiqc", + "genome_bam_sorted": "genome_bam_sorted", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "transcriptome_bam": "transcriptome_bam", + "transcriptome_bam_index": "transcriptome_bam_index", + "transcriptome_bam_stats": "transcriptome_bam_stats", + "transcriptome_bam_flagstat": "transcriptome_bam_flagstat", + "transcriptome_bam_idxstats": "transcriptome_bam_idxstats", + "quant_out_dir": "quant_out_dir", + "quant_results_file": "quant_results_file", + "rsem_counts_gene": "rsem_counts_gene", + "rsem_counts_transcripts": "rsem_counts_transcripts", + "bam_genome_rsem": "bam_genome_rsem", + "bam_transcript_rsem": "bam_transcript_rsem" + ] + ) + + emit: + output_ch +} diff --git a/src/workflows/genome_alignment_and_quant/test_run.sh b/src/workflows/genome_alignment_and_quant/test_run.sh new file mode 100755 index 0000000..69ef4dc --- /dev/null +++ b/src/workflows/genome_alignment_and_quant/test_run.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +viash ns build --setup cb --parallel + +# Split error message from standard output +# viash ns list > /dev/null + +echo "> Preparing reference data files" +gunzip --keep testData/minimal_test/reference/genes.gtf.gz +mkdir -p testData/minimal_test/reference/rsem_index +tar -C testData/minimal_test/reference/rsem_index --strip-components 1 -xavf testData/minimal_test/reference/rsem.tar.gz --no-same-owner + +cat > testData/minimal_test/input_fastq/sample_sheet.csv << HERE +id,fastq_1,fastq_2,strandedness +WT_REP1,SRR6357070_1.fastq.gz,SRR6357070_2.fastq.gz,reverse +RAP1_UNINDUCED_REP1,SRR6357073_1.fastq.gz,,reverse +HERE + +# echo "> Test 1: STAR Salmon" +# nextflow run target/nextflow/workflows/genome_alignment_and_quant/main.nf \ +# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \ +# --publish_dir test_results/genome_alignment_test1 \ +# --fasta testData/minimal_test/reference/genome.fasta \ +# --gtf testData/minimal_test/reference/genes.gtf \ +# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ +# --star_index test_results/output_test1/STAR_index \ +# --aligner star_salmon \ +# -profile docker \ +# -resume + +echo "> Test 2: STAR RSEM" +nextflow run target/nextflow/workflows/genome_alignment_and_quant/main.nf \ + --param_list testData/minimal_test/input_fastq/sample_sheet.csv \ + --publish_dir test_results/genome_alignment_test2 \ + --fasta testData/minimal_test/reference/genome.fasta \ + --gtf testData/minimal_test/reference/genes.gtf \ + --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ + --rsem_index testData/minimal_test/reference/rsem_index \ + --aligner star_rsem \ + -profile docker \ + -resume + +echo "Removing reference data files" +rm testData/minimal_test/reference/genes.gtf diff --git a/src/workflows/merge_quant_results/config.vsh.yaml b/src/workflows/merge_quant_results/config.vsh.yaml new file mode 100644 index 0000000..a4809cd --- /dev/null +++ b/src/workflows/merge_quant_results/config.vsh.yaml @@ -0,0 +1,84 @@ +name: merge_quant_results +namespace: workflows +description: A sub-workflow to merge the counts obtained from salmon quant across all samples. + +argument_groups: + - name: "Input" + arguments: + - name: "--salmon_quant_results" + type: file + multiple: true + multiple_sep: "," + - name: "--kallisto_quant_results" + type: file + multiple: true + multiple_sep: "," + - name: "--gtf" + type: file + - name: "--gtf_extra_attributes" + type: string + default: 'gene_name' + - name: "--gtf_group_features" + type: string + default: 'gene_id' + - name: "--quant_type" + type: string + description: Quantification method used. + choices: [salmon, kallisto] + default: salmon + - name: "--versions" + type: file + must_exist: false + + - name: "Output" + arguments: + - name: "--tpm_gene" + type: file + direction: output + example: gene_tpm.tsv + - name: "--counts_gene" + type: file + direction: output + example: gene_counts.tsv + - name: "--counts_gene_length_scaled" + type: file + direction: output + example: gene_counts_length_scaled.tsv + - name: "--counts_gene_scaled" + type: file + direction: output + example: gene_counts_scaled.tsv + - name: "--tpm_transcript" + type: file + direction: output + example: transcript_tpm.tsv + - name: "--lengths_gene" + type: file + direction: output + example: gene_length.tsv + - name: "--counts_transcript" + type: file + direction: output + example: transcript_counts.tsv + - name: "--lengths_transcript" + type: file + direction: output + example: transcript_length.tsv + - name: "--quant_merged_summarizedexperiment" + type: file + direction: output + example: quant_merged_summarizedexperiment + +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + +dependencies: + - name: tx2gene + - name: tximport + - name: summarizedexperiment + +runners: +- type: executable +- type: nextflow diff --git a/src/workflows/merge_quant_results/main.nf b/src/workflows/merge_quant_results/main.nf new file mode 100644 index 0000000..185e736 --- /dev/null +++ b/src/workflows/merge_quant_results/main.nf @@ -0,0 +1,67 @@ +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + + | map { id, state -> + def quant_results = state.quant_type == 'kallisto' ? state.kallisto_quant_results : state.salmon_quant_results + [id, state + [quant_results: quant_results]] + } + + | tx2gene.run ( + fromState: [ + "quant_results": "quant_results", + "gtf_extra_attributes": "gtf_extra_attributes", + "gtf": "gtf", + "gtf_group_features": "gtf_group_features", + "quant_type": "quant_type" + ], + toState: [ "tx2gene_tsv": "tsv" ] + ) + + | tximport.run ( + fromState: [ + "quant_results": "quant_results", + "tx2gene_tsv": "tx2gene_tsv", + "quant_type": "quant_type" + ], + toState: [ + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "length_gene": "length_gene", + "length_transcript": "length_transcript" + ] + ) + + | summarizedexperiment.run ( + fromState: [ + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "tx2gene_tsv": "tx2gene_tsv" + ], + toState: [ "quant_merged_summarizedexperiment": "output" ] + ) + + | setState ( + [ "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment" ] + ) + + emit: + output_ch +} \ No newline at end of file diff --git a/src/workflows/merge_quant_results/test_run.sh b/src/workflows/merge_quant_results/test_run.sh new file mode 100644 index 0000000..e69de29 diff --git a/src/workflows/post_processing/config.vsh.yaml b/src/workflows/post_processing/config.vsh.yaml new file mode 100644 index 0000000..6a6d539 --- /dev/null +++ b/src/workflows/post_processing/config.vsh.yaml @@ -0,0 +1,157 @@ +name: post_processing +namespace: workflows +description: | + A viash sub-workflow for the post-processing stage of nf-core/rnaseq pipeline. + +argument_groups: + - name: "Input" + arguments: + - name: "--id" + required: true + type: string + description: ID of the sample. + example: foo + - name: "--strandedness" + type: string + required: false + default: auto + description: Sample strand-specificity. Must be one of unstranded, forward, reverse or auto + - name: "--paired" + type: boolean + description: Paired fastq files or not? + - name: "--fasta" + type: file + description: Path to FASTA genome file. + required: true + - name: "--fai" + type: file + description: Path to FASTA index + required: true + - name: "--gtf" + type: file + description: GTF file + - name: "--genome_bam" + type: file + description: Genome BAM file + - name: "--chrom_sizes" + type: file + description: File containing chromosome lengths + - name: "--star_multiqc" + type: file + description: STAR align log file. + - name: "--extra_picard_args" + type: string + default: '' + description: Extra arguments to pass to picard MarkDuplicates command in addition to defaults defined by the pipeline. + - name: "--extra_stringtie_args" + type: string + default: '' + description: Extra arguments to pass to stringtie command in addition to defaults defined by the pipeline. + - name: "--stringtie_ignore_gtf" + type: boolean + description: Perform reference-guided de novo assembly of transcripts using StringTie, i.e. don't restrict to those in GTF file. + - name: "--bam_csi_index" + type: boolean + default: false + description: Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes. + - name: "--min_mapped_reads" + type: integer + description: Minimum percentage of uniquely mapped reads below which samples are removed from further processing. + - name: "--with_umi" + type: boolean + description: Enable UMI-based read deduplication. + default: false + - name: "--skip_qc" + type: boolean + - name: "--skip_markduplicates" + type: boolean + - name: "--skip_stringtie" + type: boolean + - name: "--skip_bigwig" + type: boolean + + - name: "Output" + arguments: + - name: "--processed_genome_bam" + type: file + direction: output + default: $id.genome.bam + - name: "--genome_bam_index" + type: file + direction: output + default: $id.genome.bam.bai + - name: "--genome_bam_stats" + type: file + direction: output + default: $id.genome.stats + - name: "--genome_bam_flagstat" + type: file + direction: output + default: $id.genome.flagstat + - name: "--genome_bam_idxstats" + type: file + direction: output + default: $id.genome.idxstats + - name: "--markduplicates_metrics" + type: file + direction: output + default: $id.MarkDuplicates.metrics.txt + - name: "--stringtie_transcript_gtf" + type: file + direction: output + default: $id.stringtie.transcripts.gtf + - name: "--stringtie_coverage_gtf" + type: file + direction: output + default: $id.stringtie.coverage.gtf + - name: "--stringtie_abundance" + type: file + direction: output + default: $id.stringtie.gene_abundance.txt + - name: "--stringtie_ballgown" + type: file + direction: output + default: $id.stringtie.ballgown + - name: "--bedgraph_forward" + type: file + direction: output + default: $id.forward.bedgraph + - name: "--bedgraph_reverse" + type: file + direction: output + default: $id.reverse.bedgraph + - name: "--bigwig_forward" + type: file + direction: output + default: $id.forward.bigwig + - name: "--bigwig_reverse" + type: file + direction: output + default: $id.reverse.bigwig + +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + +dependencies: + - name: picard_markduplicates + - name: samtools/samtools_sort + repository: biobox + - name: samtools/samtools_index + repository: biobox + - name: samtools/samtools_stats + repository: biobox + - name: samtools/samtools_flagstat + repository: biobox + - name: samtools/samtools_idxstats + repository: biobox + - name: stringtie + - name: bedtools/bedtools_genomecov + repository: biobox + - name: ucsc/bedclip + - name: ucsc/bedgraphtobigwig + +runners: +- type: executable +- type: nextflow diff --git a/src/workflows/post_processing/main.nf b/src/workflows/post_processing/main.nf new file mode 100644 index 0000000..844ec9d --- /dev/null +++ b/src/workflows/post_processing/main.nf @@ -0,0 +1,192 @@ +// Note: some helper functionality is added at the end of this file + +workflow run_wf { + take: + input_ch + + main: + + output_ch = input_ch + + | picard_markduplicates.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "bam": "genome_bam", + "fasta": "fasta", + "fai": "fai", + "extra_picard_args": "extra_picard_args" + ], + toState: [ + "processed_genome_bam": "output_bam", + "markduplicates_metrics": "metrics" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_sort.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ "input": "processed_genome_bam" ], + toState: [ "processed_genome_bam": "output" ], + key: "genome_sorted_MarkDuplicates", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_index.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "input": "processed_genome_bam", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_sorted_MarkDuplicates", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_stats.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "input": "processed_genome_bam", + "bai": "genome_bam_index" + ], + toState: [ "genome_bam_stats": "output" ], + key: "MarkDuplicates_stats", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_flagstat.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "bam": "processed_genome_bam", + "bai": "genome_bam_index" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "MarkDuplicates_flagstat", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "bam": "processed_genome_bam", + "bai": "genome_bam_index" + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "MarkDuplicates_idxstats", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | stringtie.run ( + runIf: { id, state -> !state.skip_stringtie }, + fromState: [ + "strandedness": "strandedness", + "bam": "processed_genome_bam", + "annotation_gtf": "gtf", + "extra_stringtie_args": "extra_stringtie_args" + ], + toState: [ + "stringtie_transcript_gtf": "transcript_gtf", + "stringtie_coverage_gtf": "coverage_gtf", + "stringtie_abundance": "abundance", + "stringtie_ballgown": "ballgown" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Genome-wide coverage with BEDTools + + | bedtools_genomecov.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bam": "processed_genome_bam", + ], + args: [ + split: true, + du: true, + bed_graph: true, + strand: "+" + ], + toState: [ "bedgraph_forward": "output" ], + key: "bedtools_genomecov_forward", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedtools_genomecov.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bam": "processed_genome_bam", + ], + args: [ + split: true, + du: true, + bed_graph: true, + strand: "-" + ], + toState: [ "bedgraph_reverse": "output" ], + key: "bedtools_genomecov_reverse", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedclip.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bedgraph": "bedgraph_forward", + "sizes": "chrom_sizes" + ], + toState: [ "bedgraph_forward": "output_bedgraph" ], + key: "bedclip_forward", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedgraphtobigwig.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "bedgraph": "bedgraph_forward", + "sizes": "chrom_sizes" + ], + toState: [ "bigwig_forward": "bigwig" ], + key: "bedgraphtobigwig_forward", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedclip.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bedgraph": "bedgraph_reverse", + "sizes": "chrom_sizes", + ], + toState: [ "bedgraph_reverse": "output_bedgraph" ], + key: "bedclip_reverse", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedgraphtobigwig.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "bedgraph": "bedgraph_reverse", + "sizes": "chrom_sizes" + ], + toState: [ "bigwig_reverse": "bigwig" ], + key: "bedgraphtobigwig_reverse", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + "processed_genome_bam": "processed_genome_bam", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "markduplicates_metrics": "markduplicates_metrics", + "stringtie_transcript_gtf": "stringtie_transcript_gtf", + "stringtie_coverage_gtf": "stringtie_coverage_gtf", + "stringtie_abundance": "stringtie_abundance", + "stringtie_ballgown": "stringtie_ballgown", + "bedgraph_forward": "bedgraph_forward", + "bedgraph_reverse": "bedgraph_reverse", + "bigwig_forward": "bigwig_forward", + "bigwig_reverse": "bigwig_reverse" + ) + + emit: + output_ch +} diff --git a/src/workflows/post_processing/test_run.sh b/src/workflows/post_processing/test_run.sh new file mode 100755 index 0000000..09595d5 --- /dev/null +++ b/src/workflows/post_processing/test_run.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +viash ns build --setup cb --parallel + +nextflow run target/nextflow/workflows/post_processing/main.nf \ + --publish_dir "testData/paired_end_test" \ + --id SRR6357070 \ + --paired true \ + --strandedness unstranded \ + --fasta "testData/test_output/reference_genome.fasta" \ + --fai "testData/test_output/reference_genome.fasta.fai" \ + --gtf "testData/test_output/gene_annotation.gtf" \ + --genome_bam "testData/paired_end_test/SRR6357070.genome.bam" \ + --chrom_sizes "testData/test_output/reference_genome.fasta.sizes" \ + --star_multiqc "testData/paired_end_test/SRR6357070.star_align.log" \ + --extra_picard_args "--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" \ + --gencode false \ + --biotype gene_biotype \ + -profile docker \ diff --git a/src/workflows/pre_processing/config.vsh.yaml b/src/workflows/pre_processing/config.vsh.yaml new file mode 100644 index 0000000..fadf259 --- /dev/null +++ b/src/workflows/pre_processing/config.vsh.yaml @@ -0,0 +1,263 @@ +name: pre_processing +namespace: workflows +description: | + A subworkflow for the pre-processing stage of the nf-core/rnaseq pipeline. +argument_groups: + - name: Inputs + arguments: + - name: "--id" + required: true + type: string + description: ID of the sample. + example: foo + - name: "--fastq_1" + type: file + description: Path to the sample (or read 1 of paired end sample). + required: true + must_exist: true + example: input.fastq.gz + - name: "--fastq_2" + type: file + required: false + must_exist: false + description: Path to read 2 of the sample. + - name: "--strandedness" + type: string + required: false + default: auto + description: Sample strand-specificity. Must be one of unstranded, forward, reverse or auto + - name: "--bbsplit_index" + type: file + description: BBsplit index + - name: "--ribo_database_manifest" + type: file + description: Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA. + - name: "--transcript_fasta" + type: file + description: Path to FASTA transcriptome file. + - name: "--gtf" + type: file + description: Path to GTF annotation file. + - name: "--salmon_index" + type: file + description: Path to directory containing the Salmon index + - name: "--num_trimmed_reads" + type: integer + description: Number of reads after trimming + + - name: "Extra pipeline options" + arguments: + - name: "--skip_qc" + type: boolean + description: Skip QC steps of the workflow. + + - name: "FastQC options" + arguments: + - name: "--skip_fastqc" + type: boolean + description: Skip FatQC step. + default: false + + - name: "UMI-tools options" + arguments: + - name: "--with_umi" + type: boolean + description: Enable UMI-based read deduplication. + default: false + - name: "--skip_umi_extract" + type: boolean + description: Skip umi_tools extract step. + default: false + - name: "--umitools_extract_method" + type: string + description: UMI pattern to use. + default: string + choices: [string, regex] + - name: "--umitools_bc_pattern" + type: string + description: The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI. + default: "" + - name: "--umitools_bc_pattern2" + type: string + description: The UMI barcode pattern to use if the UMI is located in read 2. + default: "" + - name: "--umi_discard_read" + type: integer + description: After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively. + choices: [0, 1, 2] + default: 0 + - name: "--umitools_umi_separator" + type: string + description: The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software. + default: "_" + - name: "--umitools_grouping_method" + type: string + description: Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently. + choices: [ "unique", "percentile", "cluster", "adjacency", "directional" ] + default: "directional" + - name: "--save_umi_intermeds" + type: boolean + description: If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory. + default: false + + - name: "Read trimming options" + arguments: + - name: "--trimmer" + type: string + description: Specify the trimming tool to use. + choices: [ "trimgalore", "fastp"] + default: "trimgalore" + - name: "--min_trimmed_reads" + type: integer + description: Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low. + default: 10000 + - name: "--skip_trimming" + type: boolean + description: Skip the adapter trimming step. + default: false + - name: "--save_trimmed" + type: boolean + description: Save the trimmed FastQ files in the results directory. + default: false + + - name: "Read filtering options" + arguments: + - name: "--skip_bbsplit" + type: boolean_true + description: Skip BBSplit for removal of non-reference genome reads. + - name: "--remove_ribo_rna" + type: boolean_true + description: Enable the removal of reads derived from ribosomal RNA using SortMeRNA. + + - name: "Output" + arguments: + - name: "--qc_output1" + type: file + direction: output + required: false + must_exist: false + description: Path to output directory + default: ${id}_r1.fastq.gz + - name: "--qc_output2" + type: file + direction: output + required: false + must_exist: false + description: Path to output directory + default: ${id}_r2.fastq.gz + - name: "--fastqc_html_1" + type: file + direction: output + description: FastQC HTML report for read 1. + required: false + must_exist: false + default: ${id}_r1.fastqc.html + - name: "--fastqc_html_2" + type: file + direction: output + description: FastQC HTML report for read 2. + required: false + must_exist: false + default: ${id}_r2.fastqc.html + - name: "--fastqc_zip_1" + type: file + direction: output + description: FastQC report archive for read 1. + required: false + must_exist: false + default: ${id}_r1.fastqc.zip + - name: "--fastqc_zip_2" + type: file + direction: output + description: FastQC report archive for read 2. + required: false + must_exist: false + default: ${id}_r2.fastqc.zip + - name: "--trim_log_1" + type: file + direction: output + required: false + must_exist: false + default: ${id}_r1.trimming_report.txt + - name: "--trim_log_2" + type: file + direction: output + required: false + must_exist: false + default: ${id}_r2.trimming_report.txt + - name: "--trim_html_1" + type: file + direction: output + required: false + must_exist: false + default: ${id}_r1.trimmed_fastqc.html + - name: "--trim_html_2" + type: file + direction: output + required: false + must_exist: false + default: ${id}_r2.trimmed_fastqc.html + - name: "--trim_zip_1" + type: file + direction: output + required: false + must_exist: false + default: ${id}_r1.trimmed_fastqc.zip + - name: "--trim_zip_2" + type: file + direction: output + required: false + must_exist: false + default: ${id}_r2.trimmed_fastqc.zip + - name: "--sortmerna_log" + type: file + direction: output + default: $id.sortmerna.log + required: false + must_exist: false + description: Sortmerna log file. + - name: "--salmon_quant_output" + type: file + direction: output + description: Results from Salmon quant + default: $id.salmon_quant_output + - name: --trim_json + type: file + description: The fastp json format report file name + default: $id.fastp_out.json + direction: output + - name: --trim_html + type: file + description: The fastp html format report file name + default: $id.fastp_out.html + direction: output + - name: --merged_out + type: file + description: File name to store merged fastp output. + direction: output +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + +dependencies: + - name: fastqc + repository: biobox + - name: umi_tools/umi_tools_extract + repository: biobox + - name: trimgalore + repository: biobox + - name: bbmap/bbmap_bbsplit + repository: biobox + - name: sortmerna + repository: biobox + - name: fastp + repository: biobox + - name: fq_subsample + repository: biobox + - name: salmon/salmon_quant + repository: biobox + +runners: +- type: executable +- type: nextflow diff --git a/src/workflows/pre_processing/main.nf b/src/workflows/pre_processing/main.nf new file mode 100644 index 0000000..c115390 --- /dev/null +++ b/src/workflows/pre_processing/main.nf @@ -0,0 +1,286 @@ +workflow run_wf { + + take: + input_ch + + main: + output_ch = input_ch + + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def paired = input.size() == 2 + [ id, state + [paired: paired, input: input] ] + } + + | fastqc.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_fastqc }, + fromState: [ "input": "input" ], + toState: {id, output_state, state -> + def newKeys = [ + "fastqc_html_1":output_state["html"][0], + "fastqc_html_2": output_state["html"][1], + "fastqc_zip_1": output_state["zip"][0], + "fastqc_zip_2": output_state["zip"][1] + ] + def new_state = state + newKeys + return new_state + }, + args: [html: "*.html", zip: "*.zip"], + directives: [ label: [ "midmem", "highcpu" ] ] + ) + + // Extract UMIs from fastq files and discard read 1 or read 2 if required + | umi_tools_extract.run ( + runIf: { id, state -> state.with_umi && !state.skip_umi_extract }, + fromState: { id, state -> + def bc_pattern2 = state.paired ? state.umitools_bc_pattern2 : state.remove(state.umitools_bc_pattern2) + def output = "${id}.r1.fastq.gz" + def read2_out = state.paired ? "${id}.r2.fastq.gz" : state.remove(state.fastq_2) + [ input: state.fastq_1, + read2_in: state.fastq_2, + bc_pattern: state.umitools_bc_pattern, + bc_pattern2: bc_pattern2, + extract_method: state.umitools_extract_method, + umi_separator: state.umitools_umi_separator, + grouping_method: state.umitools_grouping_method, + output: output, + read2_out: read2_out ] + }, + toState: [ + "fastq_1": "output", + "fastq_2": "read2_out" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Discard read if required + | map { id, state -> + def paired = state.paired + def fastq_1 = state.fastq_1 + def fastq_2 = state.fastq_2 + if (paired && state.with_umi && !state.skip_umi_extract && state.umi_discard_read != 0) { + if (state.umi_discard_read == 1) { + fastq_1 = fastq_2 + } + fastq_2 = state.remove(state.fastq_2) + paired = false + } + [ id, state + [paired: paired, fastq_1: fastq_1, fastq_2: fastq_2] ] + } + + // Trim reads using Trim galore! + | trimgalore.run ( + runIf: { id, state -> !state.skip_trimming && state.trimmer == "trimgalore" }, + fromState: { id, state -> + def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + [ paired: state.paired, + input: input, + min_trimmed_reads: state.min_trimmed_reads, + trimmed_r1: state.qc_output1, + trimmed_r2: state.qc_output2 ] + }, + args: [gzip: true, fastqc: true], + toState: [ + "fastq_1": "trimmed_r1", + "fastq_2": "trimmed_r2", + "trim_log_1": "trimming_report_r1", + "trim_log_2": "trimming_report_r2", + "trim_zip_1": "trimmed_fastqc_zip_1", + "trim_zip_2": "trimmed_fastqc_zip_2", + "trim_html_1": "trimmed_fastqc_html_1", + "trim_html_2": "trimmed_fastqc_html_2" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Trim reads using fastp + | fastp.run( + runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" }, + fromState: { id, state -> + def outputState = state.paired ? [out1: state.qc_output1, out2: state.qc_output2] : [out1: state.qc_output1, out2: state.remove(state.qc_output2)] + [input_1: state.fastq_1, input_2: state.fastq_2] + outputState + [ in1: state.fastq_1, + in2: state.fastq_2, + merge: state.fastp_save_merged, + interleaved_in: state.interleaved_reads, + detect_adapter_for_pe: state.paired, + adapter_fasta: state.fastp_adapter_fasta ] + outputState + }, + toState: [ + "fastq_1": "out1", + "fastq_2": "out2", + "failed_trim": "failed_out", + "failed_trim_unpaired1": "unpaired1", + "failed_trim_unpaired2": "unpaired2", + "trim_json": "json", + "trim_html": "html", + "trim_merged_out": "merged_out" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Perform FASTQC on reads trimmed using fastp + | fastqc.run ( + runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" }, + fromState: { id, state -> + def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + [ input: input ] + }, + toState: {id, output_state, state -> + def newKeys = [ + "trim_html_1":output_state["html"][0], + "trim_html_2": output_state["html"][1], + "trim_zip_1": output_state["zip"][0], + "trim_zip_2": output_state["zip"][1] + ] + def new_state = state + newKeys + return new_state + }, + args: [html: "*.html", zip: "*.zip"], + key: "fastqc_trimming", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Filter out contaminant RNA + | bbmap_bbsplit.run ( + runIf: { id, state -> !state.skip_bbsplit }, + fromState: { id, state -> + def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + [ paired: state.paired, + input: input, + build: state.bbsplit_index ] + }, + args: ["only_build_index": false], + toState: [ + "fastq_1": "fastq_1", + "fastq_2": "fastq_2" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Sort reads by rRNA and non-rRNA + | sortmerna.run ( + runIf: { id, state -> state.remove_ribo_rna }, + fromState: { id, state -> + def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def filePaths = state.ribo_database_manifest.readLines() + def refs = filePaths.collect { it } + def other = "${id}_non_rRNA_reads/" + [ paired_in: state.paired, + input: input, + ref: refs, + out2: state.paired, + other: other ] + }, + args: [fastx: true, num_alignments: 1], + toState: { id, output_state, state -> + def newKeys = [ + "sortmerna_output": output_state["other"], + "sortmerna_log": output_state["log"] + ] + def new_state = state + newKeys + return new_state + }, + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | map { id, state -> + if (state.remove_ribo_rna) { + def fastq_1 = state.sortmerna_output.listFiles().find{it.name == "other_fwd.fq.gz"} + def fastq_2 = state.sortmerna_output.listFiles().find{it.name == "other_rev.fq.gz"} + [ id, state + [fastq_1: fastq_1, fastq_2: fastq_2] ] + } else { + [ id, state ] + } + } + + // Sub-sample FastQ files and pseudo-align with Salmon to auto-infer strandedness + | fq_subsample.run ( + runIf: { id, state -> state.strandedness == 'auto' }, + fromState: { id, state -> + def outputState = state.paired ? [output_1: state.qc_output1, output_2: state.qc_output2] : [output_1: state.qc_output1, output_2: state.remove(state.qc_output2)] + [input_1: state.fastq_1, input_2: state.fastq_2] + outputState + }, + args: [ + record_count: 1000, + seed: 1 + ], + toState: [ + "subsampled_fastq_1": "output_1", + "subsampled_fastq_2": "output_2" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Infer lib-type for salmon quant + | map { id, state -> + def lib_type = (state.paired) ? + ( + (state.strandedness == "forward") ? + "ISF" : + ( + (state.strandedness == "reverse") ? "ISR" : "IU" + ) + ) + : ( + (state.strandedness == "forward") ? + "SF" : + ( + (state.strandedness == "reverse") ? "SR" : "U" + ) + ) + [ id, state + [lib_type: lib_type] ] + } + + | salmon_quant.run ( + runIf: { id, state -> state.strandedness == 'auto' }, + fromState: { id, state -> + def unmated_reads = !state.paired ? state.subsampled_fastq_1 : null + def mates1 = state.paired ? state.subsampled_fastq_1 : null + def mates2 = state.paired ? state.subsampled_fastq_2 : null + [ unmated_reads: unmated_reads, + mates1: mates1, + mates2: mates2, + gene_map: state.gtf, + index: state.salmon_index, + lib_type: state.lib_type ] + }, + args: [ "skip_quant": true ], + toState: [ "salmon_quant_output": "output" ], + directives: [ label: [ "highmem", "highcpu" ] ] + ) + + | map { id, state -> + def mod_state = (!state.paired) ? + [trim_log_2: state.remove(state.trim_log_2), trim_zip_2: state.remove(state.trim_zip_2), trim_html_2: state.remove(state.trim_html_2), failed_trim_unpaired2: state.remove(state.failed_trim_unpaired2)] : + [] + [ id, state + mod_state ] + } + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + "fastqc_html_1": "fastqc_html_1", + "fastqc_html_2": "fastqc_html_2", + "fastqc_zip_1": "fastqc_zip_1", + "fastqc_zip_2": "fastqc_zip_2", + "qc_output1": "fastq_1", + "qc_output2": "fastq_2", + "trim_log_1": "trim_log_1", + "trim_log_2": "trim_log_2", + "trim_zip_1": "trim_zip_1", + "trim_zip_2": "trim_zip_2", + "trim_html_1": "trim_html_1", + "trim_html_2": "trim_html_2", + "sortmerna_log": "sortmerna_log", + "trim_json": "trim_json", + "trim_html": "trim_html", + "trim_merged_out": "trim_merged_out", + "salmon_quant_output": "salmon_quant_output" + ) + + emit: + output_ch +} diff --git a/src/workflows/pre_processing/test_run.sh b/src/workflows/pre_processing/test_run.sh new file mode 100755 index 0000000..14de080 --- /dev/null +++ b/src/workflows/pre_processing/test_run.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +viash ns build --parallel --setup cb #-q pre_processing + +echo "> Preparing reference data files" +gunzip --keep testData/minimal_test/reference/genes.gtf.gz +mkdir -p testData/minimal_test/reference/salmon_index +tar -C testData/minimal_test/reference/salmon_index --strip-components 1 -xavf testData/minimal_test/reference/salmon.tar.gz --no-same-owner + +# Test paired-end data +cat > testData/minimal_test/input_fastq/sample_sheet.csv << HERE +id,fastq_1,fastq_2,strandedness +WT_REP2,SRR6357072_1.fastq.gz,SRR6357072_2.fastq.gz,auto +RAP1_UNINDUCED_REP1,SRR6357073_1.fastq.gz,,reverse +HERE + +echo "> Test 1: Running workflow with trimgalore" +nextflow run target/nextflow/workflows/pre_processing/main.nf \ + --param_list testData/minimal_test/input_fastq/sample_sheet.csv \ + --publish_dir "test_results/pre_processing_test1" \ + --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \ + --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ + --gtf testData/minimal_test/reference/genes.gtf \ + --salmon_index testData/minimal_test/reference/salmon_index \ + --skip_trimming false \ + --trimmer trimgalore \ + --remove_ribo_rna true \ + --ribo_database_manifest testData/minimal_test/reference/rrna-db-defaults.txt \ + --skip_bbsplit true \ + --bbsplit_index test_results/prepare_genome_test1/BBSplit_index \ + --with_umi false \ + -profile docker \ + -resume + +# echo "> Test 2: Running workflow with fastp" +# nextflow run target/nextflow/workflows/pre_processing/main.nf \ +# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \ +# --publish_dir "test_results/pre_processing_test2" \ +# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \ +# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ +# --gtf testData/minimal_test/reference/genes.gtf \ +# --salmon_index testData/minimal_test/reference/salmon_index \ +# --skip_trimming false \ +# --trimmer fastp \ +# --remove_ribo_rna false \ +# --ribo_database_manifest testData/minimal_test/reference/rrna-db-defaults.txt \ +# --skip_bbsplit false \ +# --bbsplit_index test_results/output_test1/BBSplit_index \ +# -profile docker \ +# -resume + +echo "Removing reference data files" +rm testData/minimal_test/reference/genes.gtf +rm -r testData/minimal_test/reference/salmon_index + +# TODO: Fix error while running sortmerna component +# docker: Error response from daemon: failed to create task for container: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: exec: "/bin/bash": stat /bin/bash: no such file or directory: unknown. \ No newline at end of file diff --git a/src/workflows/prepare_genome/config.vsh.yaml b/src/workflows/prepare_genome/config.vsh.yaml new file mode 100644 index 0000000..e47425f --- /dev/null +++ b/src/workflows/prepare_genome/config.vsh.yaml @@ -0,0 +1,168 @@ +name: prepare_genome +namespace: workflows +description: | + A subworkflow for preparing all the required genome references + +argument_groups: + - name: "Input" + arguments: + - name: "--fasta" + type: file + description: Path to FASTA genome file. + required: true + - name: "--gtf" + type: file + description: Path to GTF annotation file. This parameter is *mandatory* if --genome is not specified. + required: false + - name: "--gff" + type: file + description: Path to GFF3 annotation file. Required if "--gtf" is not specified. + required: false + - name: "--additional_fasta" + type: file + description: FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences. + - name: "--transcript_fasta" + type: file + description: Path to FASTA transcriptome file. + - name: "--gene_bed" + type: file + description: Path to BED file containing gene intervals. This will be created from the GTF file if not specified. + - name: "--splicesites" + type: file + description: Splice sites file required for HISAT2. + - name: "--skip_bbsplit" + type: boolean + description: Skip BBSplit for removal of non-reference genome reads. + - name: "--bbsplit_fasta_list" + type: file + description: List of reference genomes (separated by ";") to filter reads against with BBSplit. + multiple: true + - name: "--star_index" + type: file + description: Path to directory or tar.gz archive for pre-built STAR index. + - name: --star_sjdb_gtf_feature_exon + type: string + description: Feature type in GTF file to be used as exons for building transcripts + - name: "--rsem_index" + type: file + description: Path to directory or tar.gz archive for pre-built RSEM index. + - name: "--salmon_index" + type: file + description: Path to directory or tar.gz archive for pre-built Salmon index. + - name: "--kallisto_index" + type: file + description: Path to directory or tar.gz archive for pre-built Kallisto index. + - name: "--bbsplit_index" + type: file + description: Path to directory or tar.gz archive for pre-built BBSplit index. + - name: "--pseudo_aligner_kmer_size" + type: integer + description: Kmer length passed to indexing step of pseudoaligners. + default: 31 + - name: "--gencode" + type: boolean + description: Specify if the GTF annotation is in GENCODE format. + - name: "--biotype" + type: string + description: Biotype value to use while appending entries to GTF file when additional fasta file is provided. + - name: "--filter_gtf" + type: boolean + description: Whether to filter the GTF or not? + - name: "--aligner" + type: string + description: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'. + choices: [star_salmon, star_rsem, hisat2] + default: "star_salmon" + - name: "--pseudo_aligner" + type: string + description: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'. + choices: [salmon, kallisto] + default: "salmon" + - name: "--skip_alignment" + type: boolean_true + description: Skip all of the alignment-based processes within the pipeline. + + - name: "Output" + arguments: + - name: "--fasta_uncompressed" + type: file + direction: output + default: reference_genome.fasta + - name: "--gtf_uncompressed" + type: file + direction: output + default: gene_annotation.gtf + - name: "--transcript_fasta_uncompressed" + type: file + direction: output + default: transcriptome.fasta + - name: "--gene_bed_uncompressed" + type: file + direction: output + default: gene_annotation.bed + - name: "--star_index_uncompressed" + type: file + direction: output + description: Path to STAR index. + default: STAR_index + - name: "--rsem_index_uncompressed" + type: file + direction: output + description: Path to directory or tar.gz archive for pre-built RSEM index. + default: RSEM_index + - name: "--salmon_index_uncompressed" + type: file + direction: output + description: Path to Salmon index. + default: Salmon_index + - name: "--kallisto_index_uncompressed" + type: file + direction: output + description: Path to Kallisto index. + default: Kallisto_index + - name: "--bbsplit_index_uncompressed" + type: file + direction: output + description: Path to BBSplit index. + default: BBSplit_index + - name: "--chrom_sizes" + type: file + direction: output + description: File containing chromosome lengths + default: reference_genome.fasta.sizes + - name: "--fai" + type: file + description: FASTA index file + direction: output + default: reference_genome.fasta.fai + +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + +dependencies: + - name: gunzip + - name: gffread + repository: biobox + - name: cat_additional_fasta + - name: gtf2bed + - name: preprocess_transcripts_fasta + - name: gtf_filter + - name: rsem/rsem_prepare_reference + repository: biobox + - name: getchromsizes + - name: untar + repository: craftbox + - name: star/star_genome_generate + repository: biobox + - name: bbmap/bbmap_bbsplit + repository: biobox + - name: salmon/salmon_index + repository: biobox + - name: kallisto/kallisto_index + repository: biobox + +runners: +- type: executable +- type: nextflow diff --git a/src/workflows/prepare_genome/main.nf b/src/workflows/prepare_genome/main.nf new file mode 100644 index 0000000..a06390a --- /dev/null +++ b/src/workflows/prepare_genome/main.nf @@ -0,0 +1,311 @@ +workflow run_wf { + + take: + input_ch + + main: + output_ch = input_ch + + // Uncompress fasta + | gunzip.run ( + fromState: [ "input": "fasta" ], + toState: [ "fasta": "output" ], + key: "gunzip_fasta", + args: [ output: "reference_genome.fasta" ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress gtf + | gunzip.run ( + runIf: {id, state -> state.gtf}, + fromState: [ "input": "gtf" ], + toState: [ "gtf": "output" ], + key: "gunzip_gtf", + args: [output: "gene_annotation.gtf"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress gff + | gunzip.run ( + runIf: {id, state -> !state.gtf && state.gff}, + fromState: [ "input": "gff" ], + toState: [ "gff": "output" ], + key: "gunzip_gff", + args: [output: "gene_annotation.gff"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // gff to gtf + | gffread.run ( + runIf: {id, state -> !state.gtf && state.gff}, + fromState: [ + "input": "gff", + "genome": "fasta" + ], + toState: [ "gtf": "outfile" ], + args: [ + outfile: "gene_annotation.gtf", + gtf_output: true, + keep_attrs: true, + keep_exon_attrs: true + ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | gtf_filter.run( + runIf: {id, state -> state.gtf && state.filter_gtf}, + fromState: [ + "fasta": "fasta", + "gtf": "gtf" + ], + toState: [ "gtf": "filtered_gtf" ], + args: [filtered_gtf: "gene_annotation.gtf"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress additional fasta + | gunzip.run ( + runIf: {id, state -> state.additional_fasta}, + fromState: [ "input": "additional_fasta" ], + toState: [ "additional_fasta": "output" ], + key: "gunzip_additional_fasta", + args: [output: "additional.fasta"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // concatenate additional fasta + | cat_additional_fasta.run ( + runIf: {id, state -> state.additional_fasta}, + fromState: [ + "fasta": "fasta", + "gtf": "gtf", + "additional_fasta": "additional_fasta", + "biotype": "biotype" + ], + toState: [ + "fasta": "fasta_output", + "gtf": "gtf_output" + ], + args: [ + fasta_output: "genome_additional.fasta", + gtf_output: "genome_additional.gtf" + ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress bed file + | gunzip.run ( + runIf: {id, state -> state.gene_bed}, + fromState: [ "input": "gene_bed" ], + toState: [ "gene_bed": "output" ], + key: "gunzip_gene_bed", + args: [output: "genome_additional.bed"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // gtf to bed + | gtf2bed.run ( + runIf: { id, state -> !state.gene_bed}, + fromState: [ "gtf": "gtf" ], + toState: [ "gene_bed": "bed_output" ], + args: [bed_output: "genome_additional.bed"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress transcript fasta + | gunzip.run ( + runIf: {id, state -> state.transcript_fasta}, + fromState: [ "input": "transcript_fasta" ], + toState: [ "transcript_fasta": "output" ], + key: "transcript_fasta", + args: [output: "transcriptome.fasta"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // preprocess transcripts fasta if gtf is in gencode format + | preprocess_transcripts_fasta.run ( + runIf: {id, state -> state.transcript_fasta && state.gencode}, + fromState: [ "transcript_fasta": "transcript_fasta" ], + toState: [ "transcript_fasta": "output" ], + args: [output: "transcriptome.fasta"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // make transcript FASTA if not provided + | rsem_prepare_reference.run ( + runIf: {id, state -> !state.transcript_fasta}, + fromState: [ + "reference_fasta_files": "fasta", + "gtf": "gtf" + ], + toState: [ "make_transcript_fasta_output": "output" ], + key: "make_transcript_fasta", + args: [reference_name: "genome"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + | map { id, state -> + def transcript_fasta = (!state.transcript_fasta) ? + state.make_transcript_fasta_output.listFiles().find{it.name == "genome.transcripts.fa"} : + state.transcript_fasta + [ id, state + [transcript_fasta: transcript_fasta] ] + } + + // chromosome size and fai index + | getchromsizes.run ( + fromState: [ "fasta": "fasta" ], + toState: [ + "fai": "fai", + "sizes": "sizes" + ], + key: "chromsizes", + args: [ + fai: "genome_additional.fasta.fai", + sizes: "genome_additional.fasta.sizes" + ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // untar bbsplit index, if available + | untar.run ( + runIf: {id, state -> state.bbsplit_index}, + fromState: [ "input": "bbsplit_index" ], + toState: [ "bbsplit_index": "output" ], + key: "untar_bbsplit_index", + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | map { id, state -> + // Check if bbsplit_fasta_list is defined + def ref = (state.bbsplit_fasta_list) ? + [state.fasta] + state.bbsplit_fasta_list : + [state.fasta] + [id, state + [bbsplit_ref: ref] ] + } + + // create bbsplit index, if not already available + | bbmap_bbsplit.run ( + runIf: {id, state -> !state.skip_bbsplit && !state.bbsplit_index}, + fromState: ["ref": "bbsplit_ref"], + toState: [ "bbsplit_index": "index" ], + args: [ + only_build_index: true, + index: "BBSplit_index" + ], + key: "generate_bbsplit_index" + ) + + // Uncompress STAR index or generate from scratch if required + | untar.run ( + runIf: {id, state -> state.star_index}, + fromState: [ "input": "star_index" ], + toState: [ "star_index": "output" ], + key: "untar_star_index", + args: [output: "STAR_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | star_genome_generate.run ( + runIf: {id, state -> !state.star_index && !state.skip_alignment}, + fromState: [ + "genome_fasta_files": "fasta", + "sjdb_gtf_file": "gtf", + "sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon" + ], + toState: [ "star_index": "index" ], + key: "generate_star_index", + args: [index: "STAR_index"], + directives: [ label: [ "highmem", "highcpu" ] ] + ) + + // Uncompress RSEM index or generate from scratch if required + | untar.run ( + runIf: {id, state -> state.rsem_index}, + fromState: [ "input": "rsem_index" ], + toState: [ "rsem_index": "output" ], + key: "untar_rsem_index", + args: [output: "RSEM_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | rsem_prepare_reference.run ( + runIf: {id, state -> !state.rsem_index && state.aligner == 'star_rsem'}, + fromState: [ + "reference_fasta_files": "fasta", + "gtf": "gtf" + ], + toState: [ "rsem_index": "output" ], + key: "generate_rsem_index", + args: [reference_name: "genome"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // TODO: Uncompress HISAT2 index or generate from scratch if required + + // Uncompress Salmon index or generate from scratch if required + | untar.run ( + runIf: {id, state -> state.salmon_index}, + fromState: [ "input": "salmon_index" ], + toState: [ "salmon_index": "output" ], + key: "untar_salmon_index", + args: [output: "Salmon_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | salmon_index.run ( + runIf: {id, state -> (state.aligner == 'star_salmon' || state.pseudo_aligner == "salmon") && !state.salmon_index}, + fromState: [ + "genome": "fasta", + "transcripts": "transcript_fasta", + "kmer_len": "pseudo_aligner_kmer_size", + "gencode": "gencode" + ], + toState: [ "salmon_index": "index" ], + key: "generate_salmon_index", + args: [index: "Salmon_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // Uncompress Kallisto index or generate from scratch if required + | untar.run ( + runIf: {id, state -> state.kallisto_index}, + fromState: [ "input": "kallisto_index" ], + toState: [ "kallisto_index": "output" ], + key: "untar_kallisto_index", + args: [output: "Kallisto_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | kallisto_index.run( + runIf: {id, state -> state.pseudo_aligner == "kallisto" && !state.kallisto_index}, + fromState: [ + "input": "transcript_fasta", + "kmer_size": "pseudo_aligner_kmer_size" + ], + toState: [ "kallisto_index": "index" ], + key: "generate_kallisto_index", + args: [index: "Kallisto_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + "fasta_uncompressed": "fasta", + "gtf_uncompressed": "gtf", + "transcript_fasta_uncompressed": "transcript_fasta", + "gene_bed_uncompressed": "gene_bed", + "star_index_uncompressed": "star_index", + "salmon_index_uncompressed": "salmon_index", + "kallisto_index_uncompressed": "kallisto_index", + "bbsplit_index_uncompressed": "bbsplit_index", + "rsem_index_uncompressed": "rsem_index", + "chrom_sizes": "sizes", + "fai": "fai" + ) + + emit: + output_ch +} diff --git a/src/workflows/prepare_genome/test_run.sh b/src/workflows/prepare_genome/test_run.sh new file mode 100755 index 0000000..7a286c2 --- /dev/null +++ b/src/workflows/prepare_genome/test_run.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +viash ns build --setup cb --parallel + +# echo "Test 1: Annotation file format - GTF" +# nextflow run target/nextflow/workflows/prepare_genome/main.nf \ +# --id test1 \ +# --publish_dir "test_results/prepare_genome_test1" \ +# --fasta testData/minimal_test/reference/genome.fasta \ +# --gtf testData/minimal_test/reference/genes.gtf.gz \ +# --additional_fasta testData/minimal_test/reference/gfp.fa.gz \ +# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ +# --genotype false \ +# --biotype gene_biotype \ +# --bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \ +# --salmon_index testData/minimal_test/reference/salmon.tar.gz \ +# --rsem_index testData/minimal_test/reference/rsem.tar.gz \ +# -profile docker \ +# -resume + +# echo "Test 2: Annotation file format - GFF" +# nextflow run target/nextflow/workflows/prepare_genome/main.nf \ +# --id test2 \ +# --publish_dir "test_results/prepare_genome_test2" \ +# --fasta testData/minimal_test/reference/genome.fasta \ +# --gff testData/minimal_test/reference/genes.gff.gz \ +# --additional_fasta testData/minimal_test/reference/gfp.fa.gz \ +# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ +# --genotype false \ +# --biotype gene_biotype \ +# --bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \ +# --salmon_index testData/minimal_test/reference/salmon.tar.gz \ +# --rsem_index testData/minimal_test/reference/rsem.tar.gz \ +# -profile docker \ +# -resume + +echo "Test 3: Annotation file format - GTF; Generate indices; Generate transcripts fasta" +nextflow run target/nextflow/workflows/prepare_genome/main.nf \ + --id test3 \ + --publish_dir "test_results/prepare_genome_test3" \ + --fasta testData/minimal_test/reference/genome.fasta \ + --gtf testData/minimal_test/reference/genes.gtf.gz \ + --additional_fasta testData/minimal_test/reference/gfp.fa.gz \ + --genotype false \ + --biotype gene_biotype \ + --bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \ + --pseudo_aligner kallisto \ + --aligner star_rsem \ + -profile docker \ + -resume diff --git a/src/workflows/pseudo_alignment_and_quant/config.vsh.yaml b/src/workflows/pseudo_alignment_and_quant/config.vsh.yaml new file mode 100644 index 0000000..8a51f4b --- /dev/null +++ b/src/workflows/pseudo_alignment_and_quant/config.vsh.yaml @@ -0,0 +1,88 @@ +name: pseudo_alignment_and_quant +namespace: workflows +description: | + A viash sub-workflow for pseudo alignment and quantification stage of nf-core/rnaseq pipeline. + +argument_groups: + - name: "Input" + arguments: + - name: "--id" + required: true + type: string + description: ID of the sample. + example: foo + - name: "--fastq_1" + alternatives: [-i] + type: file + description: Path to the sample (or read 1 of paired end sample). + required: true + example: input.fastq.gz + - name: "--fastq_2" + type: file + required: false + description: Path to read 2 of the sample. + - name: "--strandedness" + type: string + required: false + description: Sample strand-specificity. Must be one of unstranded, forward, or reverse + choices: [forward, reverse, unstranded] + - name: "--gtf" + type: file + description: GTF file + - name: "--transcript_fasta" + type: file + description: Fasta file of the reference transcriptome. + - name: "--pseudo_aligner" + type: string + default: false + description: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'. + choices: [salmon, kallisto] + - name: "--salmon_index" + type: file + description: Salmon index + - name: "--kallisto_index" + type: file + description: Kallisto index + - name: "--lib_type" + type: string + description: Override library type inferred based on strandedness defined in meta object + default: '' + - name: "--kallisto_quant_fragment_length" + type: double + description: For single-end mode only, the estimated average fragment length to use for quantification with Kallisto. + - name: "--kallisto_quant_fragment_length_sd" + type: double + description: For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto. + + - name: "Output" + arguments: + - name: "--pseudo_multiqc" + type: file + direction: output + - name: "--quant_out_dir" + type: file + direction: output + default: $id.quant + - name: "--salmon_quant_results_file" + type: file + direction: output + default: $id.quant.sf + - name: "--kallisto_quant_results_file" + type: file + direction: output + default: $id.abundance.tsv + +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + +dependencies: + - name: salmon/salmon_quant + repository: biobox + - name: kallisto/kallisto_quant + repository: biobox + +runners: +- type: executable +- type: nextflow diff --git a/src/workflows/pseudo_alignment_and_quant/main.nf b/src/workflows/pseudo_alignment_and_quant/main.nf new file mode 100644 index 0000000..0c57f2e --- /dev/null +++ b/src/workflows/pseudo_alignment_and_quant/main.nf @@ -0,0 +1,104 @@ +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def paired = input.size() == 2 + [ id, state + [ paired: paired, input: input ] ] + } + + + // Infer lib-type for salmon quant + | map { id, state -> + def lib_type = (state.paired) ? + ( + (state.strandedness == "forward") ? + "ISF" : + ( + (state.strandedness == "reverse") ? "ISR" : "IU" + ) + ) + : ( + (state.strandedness == "forward") ? + "SF" : + ( + (state.strandedness == "reverse") ? "SR" : "U" + ) + ) + [ id, state + [lib_type: lib_type] ] + } + + // Count reads from BAM alignments using Salmon + | salmon_quant.run ( + runIf: { id, state -> state.pseudo_aligner == 'salmon' }, + fromState: { id, state -> + def unmated_reads = !state.paired ? state.fastq_1 : null + def mates1 = state.paired ? state.fastq_1 : null + def mates2 = state.paired ? state.fastq_2 : null + [ unmated_reads: unmated_reads, + mates1: mates1, + mates2: mates2, + gene_map: state.gtf, + index: state.salmon_index, + lib_type: state.lib_type ] + }, + toState: [ + "quant_out_dir": "output", + "salmon_quant_results_file": "quant_results" + ], + directives: [ label: [ "midmem", "highcpu" ] ] + ) + + | map { id, state -> + def mod_state = (state.pseudo_aligner == 'salmon') ? state + [pseudo_multiqc: state.quant_out_dir] : state + [ id, mod_state ] + } + + | kallisto_quant.run ( + runIf: { id, state -> state.pseudo_aligner == 'kallisto'}, + fromState: { id, state -> + def fr_stranded = state.strandedness == 'forward' + def rf_stranded = state.strandedness == 'reverse' + [ + input: state.input, + index: state.kallisto_index, + fragment_length: state.kallisto_quant_fragment_length, + sd: state.kallisto_quant_fragment_length_sd, + single: !state.paired, + fr_stranded: fr_stranded, + rf_stranded: rf_stranded, + ] + }, + args: [log: "kallisto_quant.log"], + toState: { id, output_state, state -> + def neKeys = [ + "quant_out_dir": output_state["output_dir"], + "kallisto_quant_results_file": output_state["output_dir"] + "/abundance.tsv", + "pseudo_multiqc": output_state["log"] + ] + def new_state = state + newKeys + return new_state + }, + directives: [ label: [ "midmem", "highcpu" ] ] + ) + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + [ "pseudo_multiqc": "quant_results", + "quant_out_dir": "quant_out_dir", + "salmon_quant_results_file": "salmon_quant_results_file", + "kallisto_quant_results_file": "kallisto_quant_results_file" + ] + ) + + emit: + output_ch +} diff --git a/src/workflows/pseudo_alignment_and_quant/test_run.sh b/src/workflows/pseudo_alignment_and_quant/test_run.sh new file mode 100755 index 0000000..3beb721 --- /dev/null +++ b/src/workflows/pseudo_alignment_and_quant/test_run.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +viash ns build --setup cb --parallel #-q pseudo_alignment_and_quant + +# Split error message from standard output +# viash ns list > /dev/null + +echo "> Preparing reference data files" +gunzip --keep testData/minimal_test/reference/genes.gtf.gz +mkdir -p testData/minimal_test/reference/salmon_index +tar -C testData/minimal_test/reference/salmon_index --strip-components 1 -xavf testData/minimal_test/reference/salmon.tar.gz + +cat > testData/minimal_test/input_fastq/sample_sheet.csv << HERE +id,fastq_1,fastq_2,strandedness +WT_REP1,SRR6357070_1.fastq.gz,SRR6357070_2.fastq.gz,reverse +RAP1_UNINDUCED_REP1,SRR6357073_1.fastq.gz,,reverse +HERE + +# echo "> Test 1: Salmon qunatification" +# nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \ +# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \ +# --publish_dir "test_results/pseudo_alignment_test1" \ +# --fasta testData/minimal_test/reference/genome.fasta \ +# --gtf testData/minimal_test/reference/genes.gtf.gz \ +# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ +# --salmon_index testData/minimal_test/reference/salmon_index \ +# --pseudo_aligner salmon \ +# -profile docker \ +# -resume + +echo "> Test 2: Kallisto qunatification" +nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \ + --param_list testData/minimal_test/input_fastq/sample_sheet.csv \ + --publish_dir "test_results/pseudo_alignment_test2" \ + --fasta testData/minimal_test/reference/genome.fasta \ + --gtf testData/minimal_test/reference/genes.gtf.gz \ + --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ + --kallisto_index test_results/prepare_genome_test3/Kallisto_index \ + --pseudo_aligner kallisto \ + --kallisto_quant_fragment_length 101.0 \ + --kallisto_quant_fragment_length_sd 50.0 \ + -profile docker \ + -resume + +echo "Removing reference data files" +rm testData/minimal_test/reference/genes.gtf +rm -r testData/minimal_test/reference/salmon_index diff --git a/src/workflows/quality_control/config.vsh.yaml b/src/workflows/quality_control/config.vsh.yaml new file mode 100644 index 0000000..2c3aee2 --- /dev/null +++ b/src/workflows/quality_control/config.vsh.yaml @@ -0,0 +1,651 @@ +name: quality_control +namespace: workflows +description: | + A subworkflow for the final quality control stage of the nf-core/rnaseq pipeline. + +argument_groups: + - name: Input + arguments: + - name: "--id" + required: true + description: ID of the sample + type: string + example: test + - name: "--strandedness" + type: string + required: false + default: unstranded + description: Sample strand-specificity. Must be one of unstranded, forward, reverse + - name: "--paired" + type: boolean + description: Paired-end reads? + - name: "--genome_bam" + type: file + description: Input genome BAM file + - name: "--genome_bam_index" + type: file + description: Genome BAM index file + - name: "--gene_bed" + type: file + description: Path to BED file containing gene intervals. This will be created from the GTF file if not specified. + - name: "--gtf" + type: file + description: GTF file + - name: "--gtf_group_features" + type: string + default: 'gene_id' + description: Define the attribute type used to group features in the GTF file when running Salmon. + - name: "--gtf_extra_attributes" + type: string + default: 'gene_name' + description: By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon. + - name: "--quant_out_dir" + type: file + description: Directory containing Salmon quantification results. + - name: "--quant_results_file" + type: file + description: Salmon quantification file. + - name: "--pseudo_quant_out_dir" + type: file + description: Directory containing quantification results for pseudo alignment. + - name: "--pseudo_salmon_quant_results_file" + type: file + description: Quantification file from Salmon for pseudo alignment. + - name: "--pseudo_kallisto_quant_results_file" + type: file + description: Quantification file from Kallisto for pseudo alignment. + - name: "--aligner" + type: string + description: Method used for alognment and qqunatification. + - name: "--pseudo_aligner" + type: string + description: Method used for pseudo alignment and quantification. + - name: "--rsem_counts_gene" + type: file + description: Expression counts on gene level + - name: "--rsem_counts_transcripts" + type: file + description: Expression counts on transcript level + + - name: "--skip_qc" + type: boolean_true + - name: "--skip_biotype_qc" + type: boolean_true + - name: "--skip_align" + type: boolean_true + - name: "--skip_pseudo_align" + type: boolean_true + - name: "--skip_dupradar" + type: boolean_true + - name: "--skip_qualimap" + type: boolean_true + - name: "--skip_rseqc" + type: boolean_true + - name: "--skip_multiqc" + type: boolean_true + + + # Component specific options + # preseq + - name: "--skip_preseq" + type: boolean + default: false + - name: "--extra_preseq_args" + type: string + default: '-verbose -bam -seed 1' + + # featureCounts + - name: "--featurecounts_group_type" + type: string + description: The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts. + default: gene_biotype + - name: "--featurecounts_feature_type" + type: string + description: By default, the pipeline assigns reads based on the 'exon' attribute within the GTF file. + default: exon + - name: "--gencode" + type: boolean + description: Specify if the GTF annotation is in GENCODE format. + - name: "--biotypes_header" + type: file + default: src/assets/multiqc/biotypes_header.txt + - name: "--biotype" + type: string + description: Biotype value to use while appending entries to GTF file when additional fasta file is provided. + + # RSeQC + - name: "--rseqc_modules" + type: string + multiple: true + multiple_sep: ";" + description: Specify the RSeQC modules to run_wf + default: bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication + choices: [ "bam_stat", "inner_distance", "infer_experiment", "junction_annotation", "junction_saturation", "read_distribution", "read_duplication", "tin" ] + - name: "--sample_size" + type: integer + required: false + default: 200000 + min: 1 + description: Numer of reads sampled from SAM/BAM file to infer experiment and calculate inner distance, default = 200000. + - name: "--lower_bound_size" + type: integer + required: false + default: -250 + description: Lower bound of inner distance (bp). This option is used for ploting histograme, default = -250. + - name: "--upper_bound_size" + type: integer + required: false + default: 250 + description: Upper bound of inner distance (bp). This option is used for ploting histograme, default = 250. + - name: "--step_size" + type: integer + required: false + default: 5 + description: Step size (bp) of histograme of inner distance. This option is used for plotting histogram, default = 5. + - name: "--map_qual" + type: integer + required: false + default: 30 + description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default = 30. + min: 0 + - name: "--min_intron" + type: integer + required: false + default: 50 + min: 1 + description: Minimum intron length (bp) to call a junction, default = 50. + - name: "--min_splice_read" + type: integer + required: false + default: 1 + min: 1 + description: Minimum number of supporting reads to call a junction, default = 1. + - name: "--sampling_percentile_lower_bound" + type: integer + required: false + default: 5 + description: Read sampling for junction saturation starts from this percentile, must be an integer between 0 and 100, default = 5. + min: 0 + max: 100 + - name: "--sampling_percentile_upper_bound" + type: integer + required: false + default: 100 + description: Read sampling for junction saturation ends at this percentile, must be an integer between 0 and 100, default = 5. + min: 0 + max: 100 + - name: "--sampling_percentile_step" + type: integer + required: false + default: 5 + description: Read sampling for junction saturation frequency in %. Smaller value means more sampling times. Must be an integer between 0 and 100, default = 5. + min: 0 + max: 100 + - name: "--read_count_upper_limit" + type: integer + required: false + default: 500 + description: Upper limit of reads' occurence to determine read duplication. Only used for plotting, default = 500 (times). + min: 1 + - name: "--minimum_coverage" + type: integer + required: false + default: 10 + min: 1 + description: Minimum number of reads mapped to a transcript to determin tin, default = 10. + - name: "--tin_sample_size" + type: integer + required: false + default: 100 + min: 1 + description: Number of equal-spaced nucleotide positions picked from mRNA. Note, if this number is larger than the length of mRNA (L), it will be halved until it's smaller than L (default = 100) + - name: "--subtract_background" + type: boolean_true + description: Set flag to subtract background noise (estimated from intronic reads) to determine tin. Only use this option if there are substantial intronic reads. + + # Qualimap + - name: "--pr_bases" + type: integer + required: false + default: 100 + min: 1 + description: Number of upstream/downstream nucleotide bases to compute 5'-3' bias for qualimap (default = 100). + - name: "--tr_bias" + type: integer + required: false + default: 1000 + min: 1 + description: Number of top highly expressed transcripts to compute 5'-3' bias for qualimap (default = 1000). + - name: "--algorithm" + type: string + required: false + default: uniquely-mapped-reads + description: Counting algorithm for qualimap (uniquely-mapped-reads (default) or proportional). + - name: "--sequencing_protocol" + type: string + required: false + choices: [ "non-strand-specific", "strand-specific-reverse", "strand-specific-forward" ] + default: non-strand-specific + description: Sequencing library protocol for qualimap (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)). + - name: "--sorted" + type: boolean_true + description: Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed for qualimap. Only requiredfor paired-end analysis. + - name: "--java_memory_size" + type: string + required: false + default: 4G + description: maximum Java heap memory size for qualimap, default = 4G. + + # DESeq2 + - name: "--skip_deseq2_qc" + type: boolean + - name: "--deseq2_vst" + type: boolean + description: Use vst transformation instead of rlog with DESeq2 + + # MultiQC + - name: "--multiqc_custom_config" + type: file + description: | + Custom multiqc configuration file + - name: "--multiqc_title" + type: string + description: | + Custom multiqc title + - name: "--multiqc_methods_description" + type: file + - name: "--passed_trimmed_reads" + type: boolean + - name: "--num_trimmed_reads" + type: double + - name: "--passed_mapping" + type: boolean + - name: "--percent_mapped" + type: double + - name: "--fastqc_zip_1" + type: file + must_exist: false + - name: "--fastqc_zip_2" + type: file + must_exist: false + - name: "--trim_zip_1" + type: file + must_exist: false + - name: "--trim_zip_2" + type: file + must_exist: false + - name: "--trim_log_1" + type: file + must_exist: false + - name: "--trim_log_2" + type: file + must_exist: false + - name: "--sortmerna_multiqc" + type: file + must_exist: false + - name: "--star_multiqc" + type: file + must_exist: false + - name: "--rsem_multiqc" + type: file + - name: "--genome_bam_stats" + type: file + must_exist: false + - name: "--genome_bam_flagstat" + type: file + must_exist: false + - name: "--genome_bam_idxstats" + type: file + must_exist: false + - name: "--markduplicates_multiqc" + type: file + must_exist: false + - name: "--pseudo_multiqc" + type: file + must_exist: false + + - name: Output + arguments: + # preseq + - name: "--preseq_output" + type: file + direction: output + default: $id.lc_extrap.txt + + # RSeQC + - name: "--bamstat_output" + type: file + direction: output + required: false + description: Path to output file (txt) of mapping quality statistics + default: $id.mapping_quality.txt + - name: "--strandedness_output" + type: file + direction: output + required: false + default: $id.strandedness.txt + description: Path to output report (txt) of inferred strandedness + - name: "--inner_dist_output_stats" + type: file + direction: output + must_exist: false + default: $id.inner_distance.stats + description: output file (txt) with summary statistics of inner distances of paired reads + - name: "--inner_dist_output_dist" + type: file + direction: output + must_exist: false + default: $id.inner_distance.txt + description: output file (txt) with inner distances of all paired reads + - name: "--inner_dist_output_freq" + type: file + direction: output + must_exist: false + default: $id.inner_distance_freq.txt + description: output file (txt) with frequencies of inner distances of all paired reads + - name: "--inner_dist_output_plot" + type: file + direction: output + must_exist: false + default: $id.inner_distance_plot.pdf + description: output file (pdf) with histogram plot of of inner distances of all paired reads + - name: "--inner_dist_output_plot_r" + type: file + direction: output + must_exist: false + default: $id.inner_distance_plot.r + description: output file (R) with script of histogram plot of of inner distances of all paired reads + - name: "--junction_annotation_output_log" + type: file + direction: output + required: false + default: $id.junction_annotation.log + description: output log of junction annotation script + - name: "--junction_annotation_output_plot_r" + type: file + direction: output + required: false + default: $id.junction_annotation_plot.r + description: R script to generate splice_junction and splice_events plot + - name: "--junction_annotation_output_junction_bed" + type: file + direction: output + required: false + default: $id.junction_annotation.bed + description: junction annotation file (bed format) + - name: "--junction_annotation_output_junction_interact" + type: file + direction: output + required: false + default: $id.junction_annotation.Interact.bed + description: interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization. + - name: "--junction_annotation_output_junction_sheet" + type: file + direction: output + required: false + default: $id.junction_annotation.xls + description: junction annotation file (xls format) + - name: "--junction_annotation_output_splice_events_plot" + type: file + direction: output + required: false + default: $id.splice_events.pdf + description: plot of splice events (pdf) + - name: "--junction_annotation_output_splice_junctions_plot" + type: file + direction: output + required: false + default: $id.splice_junctions_plot.pdf + description: plot of junctions (pdf) + - name: "--junction_saturation_output_plot_r" + type: file + direction: output + required: false + default: $id.junction_saturation_plot.r + description: r script to generate junction_saturation_plot plot + - name: "--junction_saturation_output_plot" + type: file + direction: output + required: false + default: $id.junction_saturation_plot.pdf + description: plot of junction saturation (pdf + - name: "--read_distribution_output" + type: file + direction: output + required: false + default: $id.read_distribution.txt + description: output file (txt) of read distribution analysis. + - name: "--read_duplication_output_duplication_rate_plot_r" + type: file + direction: output + required: false + default: $id.duplication_rate_plot.r + description: R script for generating duplication rate plot + - name: "--read_duplication_output_duplication_rate_plot" + type: file + direction: output + required: false + default: $id.duplication_rate_plot.pdf + description: duplication rate plot (pdf) + - name: "--read_duplication_output_duplication_rate_mapping" + type: file + direction: output + required: false + default: $id.duplication_rate_mapping.xls + description: Summary of mapping-based read duplication + - name: "--read_duplication_output_duplication_rate_sequence" + type: file + direction: output + required: false + default: $id.duplication_rate_sequencing.xls + description: Summary of sequencing-based read duplication + - name: "--tin_output_summary" + type: file + direction: output + required: false + default: $id.tin_summary.txt + description: summary statistics (txt) of calculated TIN metrics + - name: "--tin_output_metrics" + type: file + direction: output + required: false + default: $id.tin.xls + description: file with TIN metrics (xls) + + # dupRadar + - name: "--dupradar_output_dupmatrix" + type: file + direction: output + required: false + default: $id.dup_matrix.txt + description: path to output file (txt) of duplicate tag counts + - name: "--dupradar_output_dup_intercept_mqc" + type: file + direction: output + required: false + default: $id.dup_intercept_mqc.txt + description: path to output file (txt) of multiqc intercept value DupRadar + - name: "--dupradar_output_duprate_exp_boxplot" + type: file + direction: output + required: false + default: $id.duprate_exp_boxplot.pdf + description: path to output file (pdf) of distribution of expression box plot + - name: "--dupradar_output_duprate_exp_densplot" + type: file + direction: output + required: false + default: $id.duprate_exp_densityplot.pdf + description: path to output file (pdf) of 2D density scatter plot of duplicate tag counts + - name: "--dupradar_output_duprate_exp_denscurve_mqc" + type: file + direction: output + required: false + default: $id.duprate_exp_density_curve_mqc.pdf + description: path to output file (pdf) of density curve of gene duplication multiqc + - name: "--dupradar_output_expression_histogram" + type: file + direction: output + required: false + default: $id.expression_hist.pdf + description: path to output file (pdf) of distribution of RPK values per gene histogram + - name: "--dupradar_output_intercept_slope" + type: file + direction: output + required: false + default: $id.intercept_slope.txt + + # Qualimap + - name: "--qualimap_qc_report" + direction: output + type: file + example: $id.rnaseq_qc_results.txt + description: Text file containing the RNAseq QC results. + - name: "--qualimap_counts" + type: file + direction: output + description: Output file for computed counts. + - name: "--qualimap_report" + type: file + direction: output + example: $id.report.html + description: Report output file. Supported formats are PDF or HTML. + + # DESeq2 + - name: "--deseq2_output" + type: file + direction: output + default: deseq2 + - name: "--deseq2_output_pseudo" + type: file + direction: output + default: deseq2_pseudo + + # MultiQC + - name: "--multiqc_report" + type: file + direction: output + default: multiqc_report.html + - name: "--multiqc_data" + type: file + direction: output + default: multiqc_data + - name: "--multiqc_plots" + type: file + direction: output + default: multiqc_plots + + # Biotype QC + - name: "--featurecounts" + type: file + direction: output + must_exist: false + default: $id.featureCounts.txt + - name: "--featurecounts_summary" + type: file + direction: output + must_exist: false + default: $id.featureCounts.txt.summary + - name: "--featurecounts_multiqc" + type: file + direction: output + must_exist: false + default: $id.featureCounts_mqc.tsv + - name: "--featurecounts_rrna_multiqc" + type: file + direction: output + must_exist: false + default: $id.featureCounts_rrna_mqc.tsv + + # Counts + - name: "--tpm_gene" + type: file + direction: output + default: salmon.merged.gene_tpm.tsv + - name: "--counts_gene" + type: file + direction: output + default: salmon.merged.gene_counts.tsv + - name: "--counts_gene_length_scaled" + type: file + direction: output + default: salmon.merged.gene_counts_length_scaled.tsv + - name: "--counts_gene_scaled" + type: file + direction: output + default: salmon.merged.gene_counts_scaled.tsv + - name: "--tpm_transcript" + type: file + direction: output + default: salmon.merged.transcript_tpm.tsv + - name: "--counts_transcript" + type: file + direction: output + default: salmon.merged.transcript_counts.tsv + - name: "--quant_merged_summarizedexperiment" + type: file + direction: output + default: salmon_merged_summarizedexperiment + - name: "--pseudo_tpm_gene" + type: file + direction: output + default: pseudo_gene_tpm.tsv + - name: "--pseudo_counts_gene" + type: file + direction: output + default: pseudo_gene_counts.tsv + - name: "--pseudo_counts_gene_length_scaled" + type: file + direction: output + default: pseudo_gene_counts_length_scaled.tsv + - name: "--pseudo_counts_gene_scaled" + type: file + direction: output + default: pseudo_gene_counts_scaled.tsv + - name: "--pseudo_tpm_transcript" + type: file + direction: output + default: pseudo_transcript_tpm.tsv + - name: "--pseudo_counts_transcript" + type: file + direction: output + default: pseudo_transcript_counts.tsv + - name: "--pseudo_quant_merged_summarizedexperiment" + type: file + direction: output + default: pseudo_quant_merged_summarizedexperiment + +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + +dependencies: + - name: rseqc/rseqc_bamstat + repository: biobox + - name: rseqc/rseqc_inferexperiment + repository: biobox + - name: rseqc/rseqc_inner_distance + repository: biobox + - name: rseqc/rseqc_junctionannotation + - name: rseqc/rseqc_junctionsaturation + - name: rseqc/rseqc_readdistribution + - name: rseqc/rseqc_readduplication + - name: rseqc/rseqc_tin + - name: dupradar + - name: qualimap/qualimap_rnaseq + repository: biobox + - name: preseq_lcextrap + - name: featurecounts + repository: biobox + - name: multiqc_custom_biotype + - name: deseq2_qc + - name: prepare_multiqc_input + - name: multiqc + repository: biobox + - name: rsem_merge_counts + - name: workflows/merge_quant_results + +runners: +- type: executable +- type: nextflow diff --git a/src/workflows/quality_control/main.nf b/src/workflows/quality_control/main.nf new file mode 100644 index 0000000..4b2ecd2 --- /dev/null +++ b/src/workflows/quality_control/main.nf @@ -0,0 +1,825 @@ +workflow run_wf { + + take: + input_ch + + main: + + qc_ch = input_ch + + // temporary fix to force assignment when alignment is skipped + | map {it} + + // Feature biotype QC using featureCounts + | map { id, state -> + def biotype_in_gtf = biotypeInGtf(state.gtf, state.biotype) + def attribute_type = state.gencode ? "gene_type" : state.featurecounts_group_type + def strand = (state.strandedness == "forward") ? 1 : ((state.strandedness == "reverse") ? 2 : 0) + [ id, state + [biotype_in_gtf: biotype_in_gtf, attribute_type: attribute_type, strand: strand] ] + } + + | featurecounts.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.biotype_in_gtf && !state.skip_align }, + fromState: [ + "paired": "paired", + "strand": "strand", + "annotation": "gtf", + "input": "genome_bam", + "attribute_type": "attribute_type", + "feature_type": "featurecounts_feature_type", + "count_read_pairs": "paired" + ], + toState: [ + "featurecounts": "counts", + "featurecounts_summary": "summary" + ], + args: [ + both_aligned: true, + same_strand: true + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | multiqc_custom_biotype.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.featurecounts && !state.skip_align }, + fromState: [ + "id": "id", + "biocounts": "featurecounts", + "biotypes_header": "biotypes_header" + ], + toState: [ + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | preseq_lcextrap.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_preseq && !state.skip_align }, + fromState: [ + "paired": "paired", + "input": "genome_bam", + "extra_preseq_args": "extra_preseq_args" + ], + toState: [ "preseq_output": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | rseqc_bamstat.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "bam_stat" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input_file": "genome_bam", + "mapq": "map_qual" + ], + toState: [ "bamstat_output": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_inferexperiment.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "infer_experiment" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input_file": "genome_bam", + "refgene": "gene_bed", + "sample_size": "sample_size", + "mapq": "map_qual" + ], + toState: [ "strandedness_output": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + // Get predicted strandedness from the RSeQC infer_experiment.py output + | map { id, state -> + def inferred_strand = getInferexperimentStrandedness(state.strandedness_output, 30) + def passed_strand_check = (state.strandedness != inferred_strand[0]) ? false : true + [ id, state + [ inferred_strand: inferred_strand, passed_strand_check: passed_strand_check ] ] + } + | rseqc_inner_distance.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && state.paired && "inner_distance" in state.rseqc_modules && !state.skip_align }, + key: "inner_distance", + args: ["output_prefix": "output"], + fromState: [ + "input_file": "genome_bam", + "refgene": "gene_bed", + "sample_size": "sample_size", + "mapq": "map_qual", + "lower_bound": "lower_bound_size", + "upper_bound": "upper_bound_size", + "step": "step_size", + ], + toState: [ + "inner_dist_output_stats": "output_stats", + "inner_dist_output_dist": "output_dist", + "inner_dist_output_freq": "output_freq", + "inner_dist_output_plot": "output_plot", + "inner_dist_output_plot_r": "output_plot_r" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_junctionannotation.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_annotation" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + "map_qual": "map_qual", + "min_intron": "min_intron" + ], + toState: [ + "junction_annotation_output_log": "output_log", + "junction_annotation_output_plot_r": "output_plot_r", + "junction_annotation_output_junction_bed": "output_junction_bed", + "junction_annotation_output_junction_interact": "output_junction_interact", + "junction_annotation_output_junction_sheet": "output_junction_sheet", + "junction_annotation_output_splice_events_plot": "output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "output_splice_junctions_plot" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_junctionsaturation.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_saturation" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + "sampling_percentile_lower_bound": "sampling_percentile_lower_bound", + "sampling_percentile_upper_bound": "sampling_percentile_upper_bound", + "sampling_percentile_step": "sampling_percentile_step", + "min_intron": "min_intron", + "min_splice_read": "min_splice_read", + "map_qual": "map_qual" + ], + toState: [ + "junction_saturation_output_plot_r": "output_plot_r", + "junction_saturation_output_plot": "output_plot" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_readdistribution.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_distribution" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + ], + toState: [ "read_distribution_output": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_readduplication.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_duplication" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "read_count_upper_limit": "read_count_upper_limit", + "map_qual": "map_qual" + ], + toState: [ + "read_duplication_output_duplication_rate_plot_r": "output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "output_duplication_rate_sequence" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_tin.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "tin" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "bam_input": "genome_bam", + "bai_input": "genome_bam_index", + "refgene": "gene_bed", + "minimum_coverage": "minimum_coverage", + "sample_size": "tin_sample_size", + "subtract_background": "subtract_background" + ], + toState: [ + "tin_output_summary": "output_tin_summary", + "tin_output_metrics": "output_tin" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | dupradar.run( + runIf: { id, state -> !state.skip_qc && !state.skip_dupradar && !state.skip_align }, + fromState: [ + "id": "id", + "input": "genome_bam", + "gtf_annotation": "gtf", + "paired": "paired", + "strandedness": "strandedness" + ], + toState: [ + "dupradar_output_dupmatrix": "output_dupmatrix", + "dupradar_output_dup_intercept_mqc": "output_dup_intercept_mqc", + "dupradar_output_duprate_exp_boxplot": "output_duprate_exp_boxplot", + "dupradar_output_duprate_exp_densplot": "output_duprate_exp_densplot", + "dupradar_output_duprate_exp_denscurve_mqc": "output_duprate_exp_denscurve_mqc", + "dupradar_output_expression_histogram": "output_expression_histogram", + "dupradar_output_intercept_slope": "output_intercept_slope" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // TODO: Add outdir as an output argument to the qualimap module on biobox. + // Qualimap ouputs a few more raw data files to outdir but since the module is using a temporary directory as output dir these files are lost. + | qualimap_rnaseq.run( + runIf: { id, state -> !state.skip_qc && !state.skip_qualimap && !state.skip_align }, + fromState: [ + "bam": "genome_bam", + "gtf": "gtf", + "num_pr_bases": "pr_bases", + "num_tr_bias": "tr_bias", + "algorithm": "algorithm", + "sequencing_protocol": "sequencing_protocol", + "sorted": "sorted", + "java_memory_size": "java_memory_size", + ], + toState: [ + "qualimap_report": "report", + "qualimap_qc_report": "qc_report", + "qualimap_counts": "counts" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + merged_ch = qc_ch + | toSortedList + | map { list -> + def ids = list.collect { id, state -> id } + def strandedness = list.collect { id, state -> state.strandedness } + def num_trimmed_reads = list.collect { id, state -> state.num_trimmed_reads } + def passed_trimmed_reads = list.collect { id, state -> state.passed_trimmed_reads } + def passed_mapping = list.collect { id, state -> state.passed_mapping } + def percent_mapped = list.collect { id, state -> state.percent_mapped } + def inferred_strand = list.collect { id, state -> state.inferred_strand } + def passed_strand_check = list.collect { id, state -> state.passed_strand_check } + def gtf = list.collect { id, state -> state.gtf }.unique()[0] + def gtf_extra_attributes = list.collect { id, state -> state.gtf_extra_attributes }.unique()[0] + def gtf_group_features = list.collect { id, state -> state.gtf_group_features }.unique()[0] + def pca_header_multiqc = list.collect { id, state -> state.pca_header_multiqc }.unique()[0] + def clustering_header_multiqc = list.collect { id, state -> state.clustering_header_multiqc }.unique()[0] + def aligner = list.collect { id, state -> state.aligner }.unique()[0] + def pseudo_aligner = list.collect { id, state -> state.pseudo_aligner }.unique()[0] + def deseq2_vst = list.collect { id, state -> state.deseq2_vst }.unique()[0] + def extra_deseq2_args = list.collect { id, state -> state.extra_deseq2_args }.unique()[0] + def extra_deseq2_args2 = list.collect { id, state -> state.extra_deseq2_args2 }.unique()[0] + def skip_deseq2_qc = list.collect { id, state -> state.skip_deseq2_qc }.unique()[0] + def skip_qc = list.collect { id, state -> state.skip_qc }.unique()[0] + def skip_align = list.collect { id, state -> state.skip_align }.unique()[0] + def skip_pseudo_align = list.collect { id, state -> state.skip_pseudo_align }.unique()[0] + def quant_results = list.collect { id, state -> + (state.quant_results_file instanceof java.nio.file.Path && state.quant_results_file.exists()) ? + state.quant_results_file : + null } + def rsem_counts_gene = list.collect { id, state -> + (state.rsem_counts_gene instanceof java.nio.file.Path && state.rsem_counts_gene.exists()) ? + state.rsem_counts_gene : + null } + def rsem_counts_transcripts = list.collect { id, state -> + (state.rsem_counts_transcripts instanceof java.nio.file.Path && state.rsem_counts_transcripts.exists()) ? + state.rsem_counts_transcripts : + null } + def pseudo_quant_out_dir = list.collect { id, state -> + (state.pseudo_quant_out_dir instanceof java.nio.file.Path && state.pseudo_quant_out_dir.exists()) ? + state.pseudo_quant_out_dir : + null } + def pseudo_salmon_quant_results = list.collect { id, state -> + (state.pseudo_salmon_quant_results_file instanceof java.nio.file.Path && state.pseudo_salmon_quant_results_file.exists()) ? + state.pseudo_salmon_quant_results_file : + null } + def pseudo_kallisto_quant_results = list.collect { id, state -> + (state.pseudo_kallisto_quant_results_file instanceof java.nio.file.Path && state.pseudo_kallisto_quant_results_file.exists()) ? + state.pseudo_kallisto_quant_results_file : + null } + def fastqc_zip_1 = list.collect { id, state -> + (state.fastqc_zip_1 instanceof java.nio.file.Path && state.fastqc_zip_1.exists()) ? + state.fastqc_zip_1 : + null } + def fastqc_zip_2 = list.collect { id, state -> + (state.fastqc_zip_2 instanceof java.nio.file.Path && state.fastqc_zip_2.exists()) ? + state.fastqc_zip_2 : + null } + def trim_zip_1 = list.collect { id, state -> + (state.trim_zip_1 instanceof java.nio.file.Path && state.trim_zip_1.exists()) ? + state.trim_zip_1 : + null } + def trim_zip_2 = list.collect { id, state -> + (state.trim_zip_2 instanceof java.nio.file.Path && state.trim_zip_2.exists()) ? + state.trim_zip_2 : + null } + def trim_log_1 = list.collect { id, state -> + (state.trim_log_1 instanceof java.nio.file.Path && state.trim_log_1.exists()) ? + state.trim_log_1 : + null } + def trim_log_2 = list.collect { id, state -> + (state.trim_log_2 instanceof java.nio.file.Path && state.trim_log_2.exists()) ? + state.trim_log_2 : + null } + def sortmerna_multiqc = list.collect { id, state -> + (state.sortmerna_multiqc instanceof java.nio.file.Path && state.sortmerna_multiqc.exists()) ? + state.sortmerna_multiqc : + null } + def star_multiqc = list.collect { id, state -> + (state.star_multiqc instanceof java.nio.file.Path && state.star_multiqc.exists()) ? + state.star_multiqc : + null } + def genome_bam_stats = list.collect { id, state -> + (state.genome_bam_stats instanceof java.nio.file.Path && state.genome_bam_stats.exists()) ? + state.genome_bam_stats : + null } + def genome_bam_flagstat = list.collect { id, state -> + (state.genome_bam_flagstat instanceof java.nio.file.Path && state.genome_bam_flagstat.exists()) ? + state.genome_bam_flagstat : + null } + def genome_bam_idxstats = list.collect { id, state -> + (state.genome_bam_idxstats instanceof java.nio.file.Path && state.genome_bam_idxstats.exists()) ? + state.genome_bam_idxstats : + null } + def markduplicates_multiqc = list.collect { id, state -> + (state.markduplicates_multiqc instanceof java.nio.file.Path && state.markduplicates_multiqc.exists()) ? + state.markduplicates_multiqc : + null } + def salmon_multiqc = list.collect { id, state -> + (state.salmon_multiqc instanceof java.nio.file.Path && state.salmon_multiqc.exists()) ? + state.salmon_multiqc : + null } + def rsem_multiqc = list.collect { id, state -> + (state.rsem_multiqc instanceof java.nio.file.Path && state.rsem_multiqc.exists()) ? + state.rsem_multiqc : + null } + def pseudo_multiqc = list.collect { id, state -> + (state.pseudo_multiqc instanceof java.nio.file.Path && state.pseudo_multiqc.exists()) ? + state.pseudo_multiqc : + null } + def featurecounts_multiqc = list.collect { id, state -> + (state.featurecounts_multiqc instanceof java.nio.file.Path && state.featurecounts_multiqc.exists()) ? + state.featurecounts_multiqc : + null } + def featurecounts_rrna_multiqc = list.collect { id, state -> + (state.featurecounts_rrna_multiqc instanceof java.nio.file.Path && state.featurecounts_rrna_multiqc.exists()) ? + state.featurecounts_rrna_multiqc : + null } + def preseq_output = list.collect { id, state -> + (state.preseq_output instanceof java.nio.file.Path && state.preseq_output.exists()) ? + state.preseq_output : + null } + // def qualimap_output_dir = list.collect { id, state -> + // (state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ? + // state.qualimap_output_dir : + // null } + def dupradar_output_dup_intercept_mqc = list.collect { id, state -> + (state.dupradar_output_dup_intercept_mqc instanceof java.nio.file.Path && state.dupradar_output_dup_intercept_mqc.exists()) ? + state.dupradar_output_dup_intercept_mqc : + null } + def dupradar_output_duprate_exp_denscurve_mqc = list.collect { id, state -> + (state.dupradar_output_duprate_exp_denscurve_mqc instanceof java.nio.file.Path && state.dupradar_output_duprate_exp_denscurve_mqc.exists()) ? + state.dupradar_output_duprate_exp_denscurve_mqc : + null } + def bamstat_output = list.collect { id, state -> + (state.bamstat_output instanceof java.nio.file.Path && state.bamstat_output.exists()) ? + state.bamstat_output : + null } + def inferexperiment_multiqc = list.collect { id, state -> + (state.strandedness_output instanceof java.nio.file.Path && state.strandedness_output.exists()) ? + state.strandedness_output : + null } + def inner_dist_output_freq = list.collect { id, state -> + (state.inner_dist_output_freq instanceof java.nio.file.Path && state.inner_dist_output_freq.exists()) ? + state.inner_dist_output_freq : + null } + def junction_annotation_output_log = list.collect { id, state -> + (state.junction_annotation_output_log instanceof java.nio.file.Path && state.junction_annotation_output_log.exists()) ? + state.junction_annotation_output_log : + null } + def junction_saturation_output_plot_r = list.collect { id, state -> + (state.junction_saturation_output_plot_r instanceof java.nio.file.Path && state.junction_saturation_output_plot_r.exists()) ? + state.junction_saturation_output_plot_r : + null } + def read_distribution_output = list.collect { id, state -> + (state.read_distribution_output instanceof java.nio.file.Path && state.read_distribution_output.exists()) ? + state.read_distribution_output : + null } + def read_duplication_output_duplication_rate_mapping = list.collect { id, state -> + (state.read_duplication_output_duplication_rate_mapping instanceof java.nio.file.Path && state.read_duplication_output_duplication_rate_mapping.exists()) ? + state.read_duplication_output_duplication_rate_mapping : + null } + def tin_output_summary = list.collect { id, state -> + (state.tin_output_summary instanceof java.nio.file.Path && state.tin_output_summary.exists()) ? + state.tin_output_summary : + null } + def multiqc_custom_config = list.collect { id, state -> state.multiqc_custom_config }.unique()[0] + + ["merged", + [ + ids: ids, + strandedness: strandedness, + num_trimmed_reads: num_trimmed_reads, + passed_trimmed_reads: passed_trimmed_reads, + passed_mapping: passed_mapping, + percent_mapped: percent_mapped, + inferred_strand: inferred_strand, + passed_strand_check: passed_strand_check, + skip_align: skip_align, + skip_pseudo_align: skip_pseudo_align, + quant_results: quant_results.findAll { it != null }, + rsem_counts_gene: rsem_counts_gene.findAll { it != null }, + rsem_counts_transcripts: rsem_counts_transcripts.findAll { it != null }, + pseudo_quant_out_dir: pseudo_quant_out_dir.findAll { it != null }, + pseudo_salmon_quant_results: pseudo_salmon_quant_results.findAll { it != null }, + pseudo_kallisto_quant_results: pseudo_kallisto_quant_results.findAll { it != null }, + gtf: gtf, + gtf_extra_attributes: gtf_extra_attributes, + gtf_group_features: gtf_group_features, + pca_header_multiqc: pca_header_multiqc, + clustering_header_multiqc: clustering_header_multiqc, + aligner: aligner, + pseudo_aligner: pseudo_aligner, + deseq2_vst: deseq2_vst, + extra_deseq2_args: extra_deseq2_args, + extra_deseq2_args2: extra_deseq2_args2, + skip_deseq2_qc: skip_deseq2_qc, + fastqc_zip: fastqc_zip_1 + fastqc_zip_2, + trim_zip: trim_zip_1 + trim_zip_2, + trim_log: trim_log_1 + trim_log_2, + sortmerna_multiqc: sortmerna_multiqc, + star_multiqc: star_multiqc, + genome_bam_stats: genome_bam_stats, + genome_bam_flagstat: genome_bam_flagstat, + genome_bam_idxstats: genome_bam_idxstats, + markduplicates_multiqc: markduplicates_multiqc, + salmon_multiqc: salmon_multiqc, + rsem_multiqc: rsem_multiqc, + pseudo_multiqc: pseudo_multiqc, + featurecounts_multiqc: featurecounts_multiqc, + featurecounts_rrna_multiqc: featurecounts_rrna_multiqc, + preseq_output: preseq_output, + // qualimap_output_dir: qualimap_output_dir, + dupradar_output_dup_intercept_mqc: dupradar_output_dup_intercept_mqc, + dupradar_output_duprate_exp_denscurve_mqc: dupradar_output_duprate_exp_denscurve_mqc, + bamstat_output: bamstat_output, + inner_dist_output_freq: inner_dist_output_freq, + inferexperiment_multiqc: inferexperiment_multiqc, + junction_annotation_output_log: junction_annotation_output_log, + junction_saturation_output_plot_r: junction_saturation_output_plot_r, + read_distribution_output: read_distribution_output, + read_duplication_output_duplication_rate_mapping: read_duplication_output_duplication_rate_mapping, + tin_output_summary: tin_output_summary, + multiqc_custom_config: multiqc_custom_config + ] + ] + } + + // Merge quantification results of alignment + | merge_quant_results.run ( + runIf: { id, state -> !state.skip_align && state.aligner == 'star_salmon' }, + fromState: [ + "salmon_quant_results": "quant_results", + "gtf": "gtf", + "gtf_extra_attributes": "gtf_extra_attributes", + "gtf_group_features": "gtf_group_features" + ], + args: [ quant_type: "salmon"], + toState: [ + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "lengths_gene": "lengths_gene", + "lengths_transcript": "lengths_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment" + ], + key: "merge_quant_results", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | rsem_merge_counts.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "counts_gene": "rsem_counts_gene", + "counts_transcripts": "rsem_counts_transcripts" + ], + toState: [ + "tpm_gene": "merged_gene_tpm", + "counts_gene": "merged_gene_counts", + "tpm_transcript": "merged_transcript_tpm", + "counts_transcript": "merged_transcript_counts" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | deseq2_qc.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_deseq2_qc && !state.skip_align }, + fromState: { id, state -> + def counts = (state.aligner == "star_rsem") ? state.counts_gene : state.counts_gene_length_scaled + [ + counts: counts, + vst: state.deseq2_vst, + label: state.aligner + ] + }, + args: [count_col: 3, id_col: 1, outprefix: "deseq2"], + toState: [ + "deseq2_output": "outdir", + "deseq2_pca_multiqc": "pca_multiqc", + "deseq2_dists_multiqc": "dists_multiqc" + ], + key: "deseq2_qc_align_quant", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Merge quantification results of pseudo alignment + | merge_quant_results.run ( + runIf: { id, state -> !state.skip_pseudo_align }, + fromState: [ + "salmon_quant_results": "pseudo_salmon_quant_results", + "kallisto_quant_results": "pseudo_kallisto_quant_results", + "gtf": "gtf", + "gtf_extra_attributes": "gtf_extra_attributes", + "gtf_group_features": "gtf_group_features", + "quant_type": "pseudo_aligner" + ], + toState: [ + "pseudo_tpm_gene": "tpm_gene", + "pseudo_counts_gene": "counts_gene", + "pseudo_counts_gene_length_scaled": "counts_gene_length_scaled", + "pseudo_counts_gene_scaled": "counts_gene_scaled", + "pseudo_tpm_transcript": "tpm_transcript", + "pseudo_counts_transcript": "counts_transcript", + "pseudo_lengths_gene": "lengths_gene", + "pseudo_lengths_transcript": "lengths_transcript", + "pseudo_quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment" + ], + key: "merge_pseudo_quant_results", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | deseq2_qc.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_deseq2_qc && !state.skip_pseudo_align }, + fromState: [ + "counts": "pseudo_counts_gene_length_scaled", + "vst": "deseq2_vst", + "label": "pseudo_aligner" + ], + args: [count_col: 3, id_col: 1, outprefix: "deseq2"], + toState: [ + "deseq2_output_pseudo": "outdir", + "deseq2_pca_multiqc_pseudo": "pca_multiqc", + "deseq2_dists_multiqc_pseudo": "dists_multiqc" + ], + key: "deseq2_qc_pseuso_align_quant", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Get list of samples that failed trimming, mapping, and strand check for MultiQC report + | map { id, state -> + def fail_trimming_header = ["Sample", "Reads after trimming"] + def fail_trimming_multiqc = "" + def star_mapping_header = ["Sample", "STAR uniquely mapped reads (%)"] + def fail_mapping_multiqc = "" + def strand_check_header = ["Sample", "Provided strandedness", "Inferred strandedness", "Sense (%)", "Antisense (%)", "Undetermined (%)"] + def fail_strand_multiqc = "" + if (state.ids.size() > 0) { + fail_trimming_multiqc += "${fail_trimming_header.join('\t')}\n" + fail_mapping_multiqc += "${star_mapping_header.join('\t')}\n" + fail_strand_multiqc += "${strand_check_header.join('\t')}\n" + for (i=0; i + state.each { key, value -> + if (value instanceof ArrayList) { + value.removeAll { it == null } + } + } + mod_state = state.findAll { key, value -> value != null } + [ id, mod_state ] + } + + | prepare_multiqc_input.run( + runIf: { id, state -> !state.skip_qc && !state.skip_multiqc }, + fromState: [ + "fail_trimming_multiqc": "fail_trimming_multiqc", + "fail_mapping_multiqc": "fail_mapping_multiqc", + "fail_strand_multiqc": "fail_strand_multiqc", + "fastqc_raw_multiqc": "fastqc_zip", + "fastqc_trim_multiqc": "trim_zip", + "trim_log_multiqc": "trim_log", + "sortmerna_multiqc": "sortmerna_multiqc", + "star_multiqc": "star_multiqc", + "salmon_multiqc": "salmon_multiqc", + "rsem_multiqc": "rsem_multiqc", + "pseudo_multiqc": "pseudo_multiqc", + "samtools_stats": "genome_bam_stats", + "samtools_flagstat": "genome_bam_flagstat", + "samtools_idxstats": "genome_bam_idxstats", + "markduplicates_multiqc": "markduplicates_multiqc", + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc", + "aligner_pca_multiqc": "deseq2_pca_multiqc", + "aligner_clustering_multiqc": "deseq2_dists_multiqc", + "pseudo_aligner_pca_multiqc": "deseq2_pca_multiqc_pseudo", + "pseudo_aligner_clustering_multiqc": "deseq2_dists_multiqc_pseudo", + "preseq_multiqc": "preseq_output", + // "qualimap_multiqc": "qualimap_output_dir", + "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", + "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", + "bamstat_multiqc": "bamstat_output", + "inferexperiment_multiqc": "inferexperiment_multiqc", + "innerdistance_multiqc": "inner_dist_output_freq", + "junctionannotation_multiqc": "junction_annotation_output_log", + "junctionsaturation_multiqc": "junction_saturation_output_plot_r", + "readdistribution_multiqc": "read_distribution_output", + "readduplication_multiqc": "read_duplication_output_duplication_rate_mapping", + "tin_multiqc": "tin_output_summary", + "multiqc_config": "multiqc_custom_config" + ], + toState: [ "multiqc_input": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | multiqc.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_multiqc }, + fromState: [ + "title": "multiqc_title", + "input": "multiqc_input", + ], + args: [exclude_modules: "general_stats"], + toState: [ + "multiqc_report": "output_report", + "multiqc_data": "output_data", + "multiqc_plots": "output_plots" + ], + directives: [ label: [ "highmem", "highcpu" ] ] + ) + + | map { id, state -> + [ + id, [ + tpm_gene: state.tpm_gene, + counts_gene: state.counts_gene, + counts_gene_length_scaled: state.counts_gene_length_scaled, + counts_gene_scaled: state.counts_gene_scaled, + tpm_transcript: state.tpm_transcript, + counts_transcript: state.counts_transcript, + quant_merged_summarizedexperiment: state.quant_merged_summarizedexperiment, + deseq2_output: state.deseq2_output, + pseudo_tpm_gene: state.pseudo_tpm_gene, + pseudo_counts_gene: state.pseudo_counts_gene, + pseudo_counts_gene_length_scaled: state.pseudo_counts_gene_length_scaled, + pseudo_counts_gene_scaled: state.pseudo_counts_gene_scaled, + pseudo_tpm_transcript: state.pseudo_tpm_transcript, + pseudo_counts_transcript: state.pseudo_counts_transcript, + pseudo_quant_merged_summarizedexperiment: state.pseudo_quant_merged_summarizedexperiment, + deseq2_output_pseudo: state.deseq2_output_pseudo, + multiqc_report: state.multiqc_report, + multiqc_data: state.multiqc_data, + multiqc_plots: state.multiqc_plots + ] + ] + } + + | map { list -> list[1]} + + output_ch = qc_ch + + | combine(merged_ch) + + | map { list -> [list[0], list[1] + list[2]] } + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + [ + "preseq_output": "preseq_output", + "bamstat_output": "bamstat_output", + "strandedness_output": "strandedness_output", + "inner_dist_output_stats": "inner_dist_output_stats", + "inner_dist_output_dist": "inner_dist_output_dist", + "inner_dist_output_freq": "inner_dist_output_freq", + "inner_dist_output_plot": "inner_dist_output_plot", + "inner_dist_output_plot_r": "inner_dist_output_plot_r", + "junction_annotation_output_log": "junction_annotation_output_log", + "junction_annotation_output_plot_r": "junction_annotation_output_plot_r", + "junction_annotation_output_junction_bed": "junction_annotation_output_junction_bed", + "junction_annotation_output_junction_interact": "junction_annotation_output_junction_interact", + "junction_annotation_output_junction_sheet": "junction_annotation_output_junction_sheet", + "junction_annotation_output_splice_events_plot": "junction_annotation_output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "junction_annotation_output_splice_junctions_plot", + "junction_saturation_output_plot_r": "junction_saturation_output_plot_r", + "junction_saturation_output_plot": "junction_saturation_output_plot", + "read_distribution_output": "read_distribution_output", + "read_duplication_output_duplication_rate_plot_r": "read_duplication_output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "read_duplication_output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "read_duplication_output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "read_duplication_output_duplication_rate_sequence", + "tin_output_summary": "tin_output_summary", + "tin_output_metrics": "tin_output_metrics", + "dupradar_output_dupmatrix": "dupradar_output_dupmatrix", + "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", + "dupradar_output_duprate_exp_boxplot": "dupradar_output_duprate_exp_boxplot", + "dupradar_output_duprate_exp_densplot": "dupradar_output_duprate_exp_densplot", + "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", + "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", + "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", + "featurecounts": "featurecounts", + "featurecounts_summary": "featurecounts_summary", + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc", + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "lengths_gene": "lengths_gene", + "lengths_transcript": "lengths_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment", + "deseq2_output": "deseq2_output", + "pseudo_tpm_gene": "pseudo_tpm_gene", + "pseudo_counts_gene": "pseudo_counts_gene", + "pseudo_counts_gene_length_scaled": "pseudo_counts_gene_length_scaled", + "pseudo_counts_gene_scaled": "pseudo_counts_gene_scaled", + "pseudo_tpm_transcript": "pseudo_tpm_transcript", + "pseudo_counts_transcript": "pseudo_counts_transcript", + "pseudo_lengths_gene": "pseudo_lengths_gene", + "pseudo_lengths_transcript": "pseudo_lengths_transcript", + "pseudo_quant_merged_summarizedexperiment": "pseudo_quant_merged_summarizedexperiment", + "deseq2_output_pseudo": "deseq2_output_pseudo", + "multiqc_report": "multiqc_report", + "multiqc_data": "multiqc_data", + "multiqc_plots": "multiqc_plots" + ] + ) + + emit: + output_ch +} + +// +// Function to check whether biotype field exists in GTF file +// +def biotypeInGtf(gtf_file, biotype) { + def hits = 0 + gtf_file.eachLine { line -> + def attributes = line.split('\t')[-1].split() + if (attributes.contains(biotype)) { + hits += 1 + } + } + if (hits) { + return true + } else { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Biotype attribute '${biotype}' not found in the last column of the GTF file!\n\n" + + " Biotype QC will be skipped to circumvent the issue below:\n" + + " https://github.com/nf-core/rnaseq/issues/460\n\n" + + " Amend '--featurecounts_group_type' to change this behaviour.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + return false + } +} + +// +// Function that parses and returns the predicted strandedness from the RSeQC infer_experiment.py output +// +def getInferexperimentStrandedness(inferexperiment_file, cutoff=30) { + def sense = 0 + def antisense = 0 + def undetermined = 0 + inferexperiment_file.eachLine { line -> + def undetermined_matcher = line =~ /Fraction of reads failed to determine:\s([\d\.]+)/ + def se_sense_matcher = line =~ /Fraction of reads explained by "\++,--":\s([\d\.]+)/ + def se_antisense_matcher = line =~ /Fraction of reads explained by "\+-,-\+":\s([\d\.]+)/ + def pe_sense_matcher = line =~ /Fraction of reads explained by "1\++,1--,2\+-,2-\+":\s([\d\.]+)/ + def pe_antisense_matcher = line =~ /Fraction of reads explained by "1\+-,1-\+,2\+\+,2--":\s([\d\.]+)/ + if (undetermined_matcher) undetermined = undetermined_matcher[0][1].toFloat() * 100 + if (se_sense_matcher) sense = se_sense_matcher[0][1].toFloat() * 100 + if (se_antisense_matcher) antisense = se_antisense_matcher[0][1].toFloat() * 100 + if (pe_sense_matcher) sense = pe_sense_matcher[0][1].toFloat() * 100 + if (pe_antisense_matcher) antisense = pe_antisense_matcher[0][1].toFloat() * 100 + } + def strandedness = 'unstranded' + if (sense >= 100-cutoff) { + strandedness = 'forward' + } else if (antisense >= 100-cutoff) { + strandedness = 'reverse' + } + return [ strandedness, sense, antisense, undetermined ] +} + diff --git a/src/workflows/quality_control/test_run.sh b/src/workflows/quality_control/test_run.sh new file mode 100644 index 0000000..e69de29 diff --git a/src/workflows/rnaseq/config.vsh.yaml b/src/workflows/rnaseq/config.vsh.yaml new file mode 100644 index 0000000..c83a576 --- /dev/null +++ b/src/workflows/rnaseq/config.vsh.yaml @@ -0,0 +1,914 @@ +name: rnaseq +namespace: workflows +description: | + A viash workflow for the nf-core/rnaseq pipeline. + +argument_groups: + - name: Input + arguments: + - name: "--id" + required: true + type: string + description: ID of the sample. + example: foo + - name: "--fastq_1" + type: file + must_exist: true + required: true + multiple: true + multiple_sep: ";" + description: Path to the sample (or read 1 of paired end sample). + - name: "--fastq_2" + type: file + required: false + must_exist: false + multiple: true + multiple_sep: ";" + description: Path to read 2 of the sample. + - name: "--strandedness" + type: string + required: false + default: auto + choices: ["unstranded", "forward", "reverse", "auto"] + description: Sample strand-specificity. Must be one of unstranded, forward, reverse or auto + + - name: Reference genome options + arguments: + - name: "--fasta" + type: file + description: Path to FASTA genome file. + required: true + - name: "--gtf" + type: file + description: Path to GTF annotation file. This parameter is *mandatory* if --genome is not specified. + required: false + - name: "--gff" + type: file + description: Path to GFF3 annotation file. Required if "--gtf" is not specified. + required: false + - name: "--additional_fasta" + type: file + description: FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences. + - name: "--transcript_fasta" + type: file + description: Path to FASTA transcriptome file. + - name: "--gene_bed" + type: file + description: Path to BED file containing gene intervals. This will be created from the GTF file if not specified. + - name: "--splicesites" + type: file + description: Splice sites file required for HISAT2. + - name: "--star_index" + type: file + description: Path to directory or tar.gz archive for pre-built STAR index. + - name: "--rsem_index" + type: file + description: Path to directory or tar.gz archive for pre-built RSEM index. + - name: "--salmon_index" + type: file + description: Path to directory or tar.gz archive for pre-built Salmon index. + - name: "--kallisto_index" + type: file + description: Path to directory or tar.gz archive for pre-built Kallisto index. + - name: "--gencode" + type: boolean_true + description: Specify if the GTF annotation is in GENCODE format. + - name: "--gtf_extra_attributes" + type: string + description: Additional gene identifiers from the input GTF file when running Salmon. More than one value can be specified separated by comma. + default: gene_name + - name: "--gtf_group_features" + type: string + description: Define the attribute type used to group features in the GTF file when running Salmon. + default: gene_id + - name: "--featurecounts_group_type" + type: string + description: The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts. + default: gene_biotype + - name: "--featurecounts_feature_type" + type: string + description: By default, the pipeline assigns reads based on the 'exon' attribute within the GTF file. + default: exon + - name: --star_sjdb_gtf_feature_exon + type: string + description: Feature type in GTF file to be used as exons for building transcripts + + - name: Read trimming options + arguments: + - name: "--trimmer" + type: string + description: Specify the trimming tool to use. + choices: ["trimgalore", "fastp"] + default: "trimgalore" + - name: "--min_trimmed_reads" + type: integer + description: Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low. + default: 10000 + + - name: Read filtering options + arguments: + - name: "--bbsplit_fasta_list" + type: file + description: List of reference genomes (separated by ";") to filter reads against with BBSplit. + multiple: true + - name: "--bbsplit_index" + type: file + description: Path to directory or tar.gz archive for pre-built BBSplit index. + - name: "--remove_ribo_rna" + type: boolean_true + description: Enable the removal of reads derived from ribosomal RNA using SortMeRNA. + - name: "--ribo_database_manifest" + type: file + description: Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA. + default: src/assets/rrna-db-defaults.txt + + - name: UMI options + arguments: + - name: "--with_umi" + type: boolean_true + description: Enable UMI-based read deduplication. + - name: "--umitools_extract_method" + type: string + description: UMI pattern to use. + default: "string" + choices: [string, regex] + - name: "--umitools_bc_pattern" + type: string + description: The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI. + - name: "--umitools_bc_pattern2" + type: string + description: The UMI barcode pattern to use if the UMI is located in read 2. + - name: "--umi_discard_read" + type: integer + description: After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively. + choices: [0, 1, 2] + default: 0 + - name: "--umitools_umi_separator" + type: string + description: The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software. + default: "_" + - name: "--umitools_grouping_method" + type: string + description: Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently. + choices: [ "unique", "percentile", "cluster", "adjacency", "directional" ] + default: "directional" + - name: "--umi_dedup_stats" + type: boolean_true + description: Generate output stats when running "umi_tools dedup". + + - name: Alignment options + arguments: + - name: "--aligner" + type: string + description: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'. + choices: [star_salmon, star_rsem, hisat2] + default: "star_salmon" + - name: "--pseudo_aligner" + type: string + description: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'. + choices: [salmon, kallisto] + default: "salmon" + - name: "--pseudo_aligner_kmer_size" + type: integer + description: Kmer length passed to indexing step of pseudoaligners. + default: 31 + - name: "--kallisto_quant_fragment_length" + type: double + description: For single-end mode only, the estimated average fragment length to use for quantification with Kallisto. + - name: "--kallisto_quant_fragment_length_sd" + type: double + description: For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto. + - name: "--bam_csi_index" + type: boolean_true + description: Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes. + - name: "--salmon_quant_libtype" + type: string + description: Override Salmon library type inferred based on strandedness defined in meta object. + - name: "--min_mapped_reads" + type: integer + description: Minimum percentage of uniquely mapped reads below which samples are removed from further processing. + default: 5 + - name: "--stringtie_ignore_gtf" + type: boolean_true + description: Perform reference-guided de novo assembly of transcripts using StringTie, i.e. don't restrict to those in GTF file. + - name: "--extra_stringtie_args" + type: string + default: '-v' + description: Extra arguments to pass to stringtie command in addition to defaults defined by the pipeline. + - name: "--save_unaligned" + type: boolean_true + description: Where possible, save unaligned reads from either STAR, HISAT2 or Salmon to the results directory. + - name: "--save_align_intermeds" + type: boolean_true + description: Save the intermediate BAM files from the alignment step. + - name: "--skip_alignment" + type: boolean_true + description: Skip all of the alignment-based processes within the pipeline. + - name: "--skip_pseudo_alignment" + type: boolean_true + description: Skip all of the pseudo-alignment-based processes within the pipeline. + + - name: Process skipping options + arguments: + - name: "--skip_fastqc" + type: boolean + description: Skip FatQC step. + default: false + - name: "--skip_trimming" + type: boolean + description: Skip the adapter trimming step. + default: false + # See: + # - name: "--skip_bbsplit" + # type: boolean_true + # description: Skip BBSplit for removal of non-reference genome reads. + - name: "--skip_umi_extract" + type: boolean + description: Skip umi_tools extract step. + default: false + - name: "--skip_qc" + type: boolean_true + description: Skip all QC steps except for MultiQC. + - name: "--skip_markduplicates" + type: boolean_true + description: Skip picard MarkDuplicates step. + - name: "--skip_stringtie" + type: boolean_true + description: Skip StringTie. + - name: "--skip_biotype_qc" + type: boolean_true + description: Skip additional featureCounts process for biotype QC. + - name: "--skip_bigwig" + type: boolean_true + description: Skip bigWig file creation. + - name: "--skip_preseq" + type: boolean_true + description: Skip Preseq. + # - name: "--skip_deseq2_qc" + # type: boolean_true + # description: Skip DESeq2 PCA and heatmap plotting. + - name: --skip_dupradar + type: boolean_true + description: Skip dupRadar. + - name: --skip_qualimap + type: boolean_true + description: Skip Qualimap. + - name: --skip_rseqc + type: boolean_true + description: Skip RSeQC. + - name: --skip_multiqc + type: boolean_true + description: Skip MultiQC. + + - name: Other process arguments + arguments: + - name: "--extra_picard_args" + type: string + default: ' --ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' + description: Extra arguments to pass to picard MarkDuplicates command in addition to defaults defined by the pipeline. + - name: "--extra_preseq_args" + type: string + description: Extra arguments to pass to preseq lc_extrap command in addition to defaults defined by the pipeline + default: '-verbose -seed 1 -seg_len 100000000' + - name: "--deseq2_vst" + type: boolean + default: true + description: Use vst transformation instead of rlog with DESeq2 + - name: "--rseqc_modules" + type: string + multiple: true + multiple_sep: ";" + description: Specify the RSeQC modules to run_wf (comma-separated list) + default: bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication + choices: [ "bam_stat", "inner_distance", "infer_experiment", "junction_annotation", "junction_saturation", "read_distribution", "read_duplication", "tin" ] + + - name: MultiQC paramenters + arguments: + - name: "--multiqc_custom_config" + type: file + description: | + Custom multiqc configuration file + - name: "--multiqc_title" + type: string + description: | + Custom multiqc title + - name: "--multiqc_methods_description" + type: file + + - name: Output + arguments: + # Reference files + - name: "--output_fasta" + type: file + direction: output + default: reference/genome.fasta + - name: "--output_gtf" + type: file + direction: output + default: reference/gene_annotation.gtf + - name: "--output_transcript_fasta" + type: file + direction: output + default: reference/transcriptome.fasta + - name: "--output_gene_bed" + type: file + direction: output + default: reference/gene_annotation.bed + - name: "--output_star_index" + type: file + direction: output + description: Path to STAR index. + default: reference/index/STAR + - name: "--output_salmon_index" + type: file + direction: output + description: Path to Salmon index. + default: reference/index/Salmon + - name: "--output_bbsplit_index" + type: file + direction: output + description: Path to BBSplit index. + default: reference/index/BBSplit + - name: "--output_kallisto_index" + type: file + direction: output + description: Path to Kallisto index. + default: reference/index/Kallisto + + # fastq + - name: "--output_fastq_1" + type: file + direction: output + required: false + must_exist: false + description: Path to output directory + default: fastq/${id}_r1.fastq.gz + - name: "--output_fastq_2" + type: file + direction: output + required: false + must_exist: false + description: Path to output directory + default: fastq/${id}_r2.fastq.gz + + # FastQC + - name: "--fastqc_html_1" + type: file + direction: output + description: FastQC HTML report for read 1. + required: false + must_exist: false + default: fastqc_raw/${id}_r1.fastqc.html + - name: "--fastqc_html_2" + type: file + direction: output + description: FastQC HTML report for read 2. + required: false + must_exist: false + default: fastqc_raw/${id}_r2.fastqc.html + - name: "--fastqc_zip_1" + type: file + direction: output + description: FastQC report archive for read 1. + required: false + must_exist: false + default: fastqc_raw/${id}_r1.fastqc.zip + - name: "--fastqc_zip_2" + type: file + direction: output + description: FastQC report archive for read 2. + required: false + must_exist: false + default: fastqc_raw/${id}_r2.fastqc.zip + - name: "--trim_html_1" + type: file + direction: output + required: false + must_exist: false + default: fastqc_trim/${id}_r1.trimmed_fastqc.html + - name: "--trim_html_2" + type: file + direction: output + required: false + must_exist: false + default: fastqc_trim/${id}_r2.trimmed_fastqc.html + - name: "--trim_zip_1" + type: file + direction: output + required: false + must_exist: false + default: fastqc_trim/${id}_r1.trimmed_fastqc.zip + - name: "--trim_zip_2" + type: file + direction: output + required: false + must_exist: false + default: fastqc_trim/${id}_r2.trimmed_fastqc.zip + + # TrimGalore + - name: "--trim_log_1" + type: file + direction: output + required: false + must_exist: false + default: trimgalore/${id}_r1.trimming_report.txt + - name: "--trim_log_2" + type: file + direction: output + required: false + must_exist: false + default: trimgalore/${id}_r2.trimming_report.txt + + # fastp + - name: --fastp_trim_json + type: file + description: The fastp json format report file name + default: fastp/$id_out.json + direction: output + - name: --fastp_trim_html + type: file + description: The fastp html format report file name + default: fastp/$id_out.html + direction: output + + # SortMeRNA + - name: "--sortmerna_log" + type: file + direction: output + default: sortmerna/$id.log + required: false + must_exist: false + description: Sortmerna log file. + + # STAR + - name: "--star_alignment" + type: file + direction: output + default: STAR/$id + - name: "--genome_bam_sorted" + type: file + direction: output + default: STAR/genome_processed/$id.genome.bam + - name: "--genome_bam_index" + type: file + direction: output + default: STAR/genome_processed/$id.genome.bam.bai + - name: "--transcriptome_bam" + type: file + direction: output + default: STAR/transcriptome_processed/$id.transcriptome.bam + - name: "--transcriptome_bam_index" + type: file + direction: output + default: STAR/transcriptome_processed/$id.transcriptome.bam.bai + - name: "--star_log" + type: file + direction: output + default: STAR/log/$id.log + + # samtools + - name: "--genome_bam_stats" + type: file + direction: output + default: samtools_stats/$id.genome.stats + - name: "--genome_bam_flagstat" + type: file + direction: output + default: samtools_stats/$id.genome.flagstat + - name: "--genome_bam_idxstats" + type: file + direction: output + default: samtools_stats/$id.genome.idxstats + - name: "--transcriptome_bam_stats" + type: file + direction: output + default: samtools_stats/$id.transcriptome.stats + - name: "--transcriptome_bam_flagstat" + type: file + direction: output + default: samtools_stats/$id.transcriptome.flagstat + - name: "--transcriptome_bam_idxstats" + type: file + direction: output + default: samtools_stats/$id.transcriptome.idxstats + + # Transcript quantification + - name: "--salmon_quant_results" + type: file + direction: output + default: STAR_Salmon/$id + - name: "--salmon_quant_results_file" + type: file + direction: output + default: STAR_Salmon/$id/quant.sf + - name: "--pseudo_quant_results" + type: file + direction: output + default: Pseudo_align_quant/$id + + # RSEM + - name: "--rsem_counts_gene" + type: file + description: Expression counts on gene level + default: RSEM/$id.genes.results + direction: output + - name: "--rsem_counts_transcripts" + type: file + description: Expression counts on transcript level + default: RSEM/$id.isoforms.results + direction: output + - name: "--bam_star_rsem" + type: file + description: BAM file generated by STAR (from RSEM) + default: RSEM/$id.STAR.genome.bam + direction: output + - name: "--bam_genome_rsem" + type: file + description: Genome BAM file (from RSEM) + default: RSEM/$id.genome.bam + direction: output + - name: "--bam_transcript_rsem" + type: file + description: Transcript BAM file (from RSEM) + default: RSEM/$id.transcript.bam + direction: output + + # Quantification (alignment) + - name: "--tpm_gene" + type: file + direction: output + default: transcript_quantification/gene_tpm.tsv + - name: "--counts_gene" + type: file + direction: output + default: transcript_quantification/gene_counts.tsv + - name: "--counts_gene_length_scaled" + type: file + direction: output + default: transcript_quantification/gene_counts_length_scaled.tsv + - name: "--counts_gene_scaled" + type: file + direction: output + default: transcript_quantification/gene_counts_scaled.tsv + - name: "--tpm_transcript" + type: file + direction: output + default: transcript_quantification/transcript_tpm.tsv + - name: "--counts_transcript" + type: file + direction: output + default: transcript_quantification/transcript_counts.tsv + - name: "--quant_merged_summarizedexperiment" + type: file + direction: output + default: transcript_quantification/summarizedexperiment + + # MarkDuplicates + - name: "--markduplicates_metrics" + type: file + direction: output + default: picard/$id.MarkDuplicates.metrics.txt + + # StringTie + - name: "--stringtie_transcript_gtf" + type: file + direction: output + default: stringtie/$id.transcripts.gtf + - name: "--stringtie_coverage_gtf" + type: file + direction: output + default: stringtie/$id.coverage.gtf + - name: "--stringtie_abundance" + type: file + direction: output + default: stringtie/$id.gene_abundance.txt + - name: "--stringtie_ballgown" + type: file + direction: output + default: stringtie/$id.ballgown + + # featureCounts + - name: "--featurecounts" + type: file + direction: output + default: featurecounts/$id.featureCounts.txt + - name: "--featurecounts_summary" + type: file + direction: output + default: featurecounts/$id.featureCounts.txt.summary + - name: "--featurecounts_multiqc" + type: file + direction: output + must_exist: false + default: featurecounts/$id.featureCounts_mqc.tsv + - name: "--featurecounts_rrna_multiqc" + type: file + direction: output + must_exist: false + default: featurecounts/$id.featureCounts_rrna_mqc.tsv + + # bedGraph + - name: "--bedgraph_forward" + type: file + direction: output + default: bedgraph/$id.forward.bedgraph + - name: "--bedgraph_reverse" + type: file + direction: output + default: bedgraph/$id.reverse.bedgraph + + # bigWig + - name: "--bigwig_forward" + type: file + direction: output + default: bigwig/$id.forward.bigwig + - name: "--bigwig_reverse" + type: file + direction: output + default: bigwig/$id.reverse.bigwig + + # preseq lc_extrap + - name: "--preseq_output" + type: file + direction: output + default: preseq/$id.lc_extrap.txt + + # RSeQC + - name: "--bamstat_output" + type: file + direction: output + required: false + description: Path to output file (txt) of mapping quality statistics + default: RSeQC/bamstat/$id.mapping_quality.txt + - name: "--strandedness_output" + type: file + direction: output + required: false + default: RSeQC/inferexperiment/$id.strandedness.txt + description: Path to output report (txt) of inferred strandedness + - name: "--inner_dist_output_stats" + type: file + direction: output + must_exist: false + default: RSeQC/innerdistance/$id.inner_distance.stats + description: output file (txt) with summary statistics of inner distances of paired reads + - name: "--inner_dist_output_dist" + type: file + direction: output + must_exist: false + default: RSeQC/innerdistance/txt/$id.inner_distance.txt + description: output file (txt) with inner distances of all paired reads + - name: "--inner_dist_output_freq" + type: file + direction: output + must_exist: false + default: RSeQC/innerdistance/txt/$id.inner_distance_freq.txt + description: output file (txt) with frequencies of inner distances of all paired reads + - name: "--inner_dist_output_plot" + type: file + direction: output + must_exist: false + default: RSeQC/innerdistance/pdf/$id.inner_distance_plot.pdf + description: output file (pdf) with histogram plot of of inner distances of all paired reads + - name: "--inner_dist_output_plot_r" + type: file + direction: output + must_exist: false + default: RSeQC/innerdistance/rscript/$id.inner_distance_plot.r + description: output file (R) with script of histogram plot of of inner distances of all paired reads + - name: "--junction_annotation_output_log" + type: file + direction: output + required: false + default: RSeQC/junctionannotation/log/$id.junction_annotation.log + description: output log of junction annotation script + - name: "--junction_annotation_output_plot_r" + type: file + direction: output + required: false + default: RSeQC/junctionannotation/rscript/$id.junction_annotation_plot.r + description: R script to generate splice_junction and splice_events plot + - name: "--junction_annotation_output_junction_bed" + type: file + direction: output + required: false + default: RSeQC/junctionannotation/bed/$id.junction_annotation.bed + description: junction annotation file (bed format) + - name: "--junction_annotation_output_junction_interact" + type: file + direction: output + required: false + default: RSeQC/junctionannotation/bed/$id.junction_annotation.Interact.bed + description: interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization. + - name: "--junction_annotation_output_junction_sheet" + type: file + direction: output + required: false + default: RSeQC/junctionannotation/xls/$id.junction_annotation.xls + description: junction annotation file (xls format) + - name: "--junction_annotation_output_splice_events_plot" + type: file + direction: output + required: false + default: RSeQC/junctionannotation/pdf/$id.splice_events.pdf + description: plot of splice events (pdf) + - name: "--junction_annotation_output_splice_junctions_plot" + type: file + direction: output + required: false + default: RSeQC/junctionannotation/pdf/$id.splice_junctions_plot.pdf + description: plot of junctions (pdf) + - name: "--junction_saturation_output_plot_r" + type: file + direction: output + required: false + default: RSeQC/junctionsaturation/rscript/$id.junction_saturation_plot.r + description: r script to generate junction_saturation_plot plot + - name: "--junction_saturation_output_plot" + type: file + direction: output + required: false + default: RSeQC/junctionsaturation/pdf/$id.junction_saturation_plot.pdf + description: plot of junction saturation (pdf + - name: "--read_distribution_output" + type: file + direction: output + required: false + default: RSeQC/readdistribution/$id.read_distribution.txt + description: output file (txt) of read distribution analysis. + - name: "--read_duplication_output_duplication_rate_plot_r" + type: file + direction: output + required: false + default: RSeQC/readduplication/rscrpt/$id.duplication_rate_plot.r + description: R script for generating duplication rate plot + - name: "--read_duplication_output_duplication_rate_plot" + type: file + direction: output + required: false + default: RSeQC/readduplication/pdf/$id.duplication_rate_plot.pdf + description: duplication rate plot (pdf) + - name: "--read_duplication_output_duplication_rate_mapping" + type: file + direction: output + required: false + default: RSeQC/readduplication/xls/$id.duplication_rate_mapping.xls + description: Summary of mapping-based read duplication + - name: "--read_duplication_output_duplication_rate_sequence" + type: file + direction: output + required: false + default: RSeQC/readduplication/xls/$id.duplication_rate_sequencing.xls + description: Summary of sequencing-based read duplication + - name: "--tin_output_summary" + type: file + direction: output + required: false + default: RSeQC/tin/txt/$id.tin_summary.txt + description: summary statistics (txt) of calculated TIN metrics + - name: "--tin_output_metrics" + type: file + direction: output + required: false + default: RSeQC/tin/xls/$id.tin.xls + description: file with TIN metrics (xls) + + # DupRadar + - name: "--dupradar_output_dupmatrix" + type: file + direction: output + required: false + default: dupradar/gene_data/$id.dup_matrix.txt + description: path to output file (txt) of duplicate tag counts + - name: "--dupradar_output_dup_intercept_mqc" + type: file + direction: output + required: false + default: dupradar/mqc_intercept/$id.dup_intercept_mqc.txt + description: path to output file (txt) of multiqc intercept value DupRadar + - name: "--dupradar_output_duprate_exp_boxplot" + type: file + direction: output + required: false + default: dupradar/box_plot/$id.duprate_exp_boxplot.pdf + description: path to output file (pdf) of distribution of expression box plot + - name: "--dupradar_output_duprate_exp_densplot" + type: file + direction: output + required: false + default: dupradar/scatter_plot/$id.duprate_exp_densityplot.pdf + description: path to output file (pdf) of 2D density scatter plot of duplicate tag counts + - name: "--dupradar_output_duprate_exp_denscurve_mqc" + type: file + direction: output + required: false + default: dupradar/density_curve/$id.duprate_exp_density_curve_mqc.pdf + description: path to output file (pdf) of density curve of gene duplication multiqc + - name: "--dupradar_output_expression_histogram" + type: file + direction: output + required: false + default: dupradar/histogram/$id.expression_hist.pdf + description: path to output file (pdf) of distribution of RPK values per gene histogram + - name: "--dupradar_output_intercept_slope" + type: file + direction: output + required: false + default: dupradar/intercept_slope/$id.intercept_slope.txt + + # Qualimap + - name: "--qualimap_qc_report" + direction: output + type: file + default: Qualimap/$id.rnaseq_qc_results.txt + description: Text file containing the RNAseq QC results. + - name: "--qualimap_counts" + type: file + direction: output + default: Qualimap/$id.counts.txt + description: Output file for computed counts. + - name: "--qualimap_report" + type: file + direction: output + default: Qualimap/$id.report.html + description: Report output file. Supported formats are PDF or HTML. + + # DESeq2 + - name: "--deseq2_output" + type: file + direction: output + default: deseq2_qc + - name: "--deseq2_output_pseudo" + type: file + direction: output + default: deseq2_qc_pseudo + + # MultiQC + - name: "--multiqc_report" + type: file + direction: output + default: multiqc/multiqc_report.html + - name: "--multiqc_data" + type: file + direction: output + default: multiqc/multiqc_data + - name: "--multiqc_plots" + type: file + direction: output + default: multiqc/multiqc_plots + - name: "--multiqc_versions" + type: file + direction: output + + # Quantification (pseudo alignment) + - name: "--pseudo_counts_gene" + type: file + direction: output + default: pseudo_alignment_quantification/gene_counts.tsv + - name: "--pseudo_counts_gene_length_scaled" + type: file + direction: output + default: pseudo_alignment_quantification/gene_counts_length_scaled.tsv + - name: "--pseudo_counts_gene_scaled" + type: file + direction: output + default: pseudo_alignment_quantification/gene_counts_scaled.tsv + - name: "--pseudo_tpm_gene" + type: file + direction: output + default: pseudo_alignment_quantification/gene_tpm.tsv + - name: "--pseudo_tpm_transcript" + type: file + direction: output + default: pseudo_alignment_quantification/transcript_tpm.tsv + - name: "--pseudo_counts_transcript" + type: file + direction: output + default: pseudo_alignment_quantification/transcript_counts.tsv + - name: "--pseudo_quant_merged_summarizedexperiment" + type: file + direction: output + default: pseudo_alignment_quantification/quant_merged_summarizedexperiment + +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + +test_resources: + - type: nextflow_script + path: test.nf + entrypoint: test_wf + +dependencies: + - name: workflows/prepare_genome + - name: cat_fastq + - name: workflows/pre_processing + - name: workflows/genome_alignment_and_quant + - name: workflows/pseudo_alignment_and_quant + - name: workflows/post_processing + - name: workflows/quality_control + +runners: +- type: executable +- type: nextflow diff --git a/src/workflows/rnaseq/main.nf b/src/workflows/rnaseq/main.nf new file mode 100644 index 0000000..f974d00 --- /dev/null +++ b/src/workflows/rnaseq/main.nf @@ -0,0 +1,668 @@ +workflow run_wf { + take: + input_ch + + main: + + reference_ch = input_ch + + | map { id, state -> + def biotype = state.gencode ? "gene_type" : state.featurecounts_group_type + def filter_gtf = + ( + ( !state.skip_alignment && state.aligner) // Condition 1: Alignment is required and aligner is set + || ( !state.skip_pseudo_alignment && state.pseudo_aligner ) // Condition 2: Pseudoalignment is required and pseudoaligner is set + || ( !state.transcript_fasta ) // Condition 3: Transcript FASTA file is not provided + ) + && + ( !state.skip_gtf_filter ) // Condition 4: --skip_gtf_filter is not provided + [ id, state + [ biotype: biotype, filter_gtf: filter_gtf ] ] + } + + | toSortedList + + | map { list -> + [ "ref", [ + fasta: list.collect { id, state -> state.fasta }.unique()[0], + gtf: list.collect { id, state -> state.gtf }.unique()[0], + gff: list.collect { id, state -> state.gff }.unique()[0], + additional_fasta: list.collect { id, state -> state.additional_fasta }.unique()[0], + transcript_fasta:list.collect { id, state -> state.transcript_fasta }.unique()[0], + gene_bed: list.collect { id, state -> state.gene_bed }.unique()[0], + bbsplit_fasta_list: list.collect { id, state -> state.bbsplit_fasta_list }.unique()[0], + aligner: list.collect { id, state -> state.aligner }.unique()[0], + pseudo_aligner: list.collect { id, state -> state.pseudo_aligner }.unique()[0], + star_index: list.collect { id, state -> state.star_index }.unique()[0], + rsem_index: list.collect { id, state -> state.rsem_index }.unique()[0], + salmon_index: list.collect { id, state -> state.salmon_index }.unique()[0], + kallisto_index: list.collect { id, state -> state.kallisto_index }.unique()[0], + // splicesites: list.collect { id, state -> state.splicesites }.unique()[0], + // hisat2_index: list.collect { id, state -> state.hisat2_index }.unique()[0], + bbsplit_index: list.collect { id, state -> state.bbsplit_index }.unique()[0], + // See: + skip_bbsplit: true, // list.collect { id, state -> state.skip_bbsplit }.unique()[0], + skip_alignment: list.collect { id, state -> state.skip_alignment }.unique()[0], + gencode: list.collect { id, state -> state.gencode }.unique()[0], + biotype: list.collect { id, state -> state.biotype }.unique()[0], + star_sjdb_gtf_feature_exon: list.collect { id, state -> state.star_sjdb_gtf_feature_exon }.unique()[0], + filter_gtf: list.collect { id, state -> state.filter_gtf }.unique()[0], + pseudo_aligner_kmer_size: list.collect { id, state -> state.pseudo_aligner_kmer_size }.unique()[0] ] + ] + } + + // prepare all the necessary files for reference genome + | prepare_genome.run ( + fromState: [ + "fasta": "fasta", + "gtf": "gtf", + "gff": "gff", + "additional_fasta": "additional_fasta", + "transcript_fasta": "transcript_fasta", + "gene_bed": "gene_bed", + "bbsplit_fasta_list": "bbsplit_fasta_list", + "star_index": "star_index", + "rsem_index": "rsem_index", + "salmon_index": "salmon_index", + "kallisto_index": "kallisto_index", + "pseudo_aligner_kmer_size": "pseudo_aligner_kmer_size", + // "splicesites": "splicesites", + // "hisat2_index": "hisat2_index", + "bbsplit_index": "bbsplit_index", + "skip_bbsplit": "skip_bbsplit", + "gencode": "gencode", + "biotype": "biotype", + "filter_gtf": "filter_gtf", + "aligner": "aligner", + "pseudo_aligner": "pseudo_aligner", + "skip_alignment": "skip_alignment", + "star_sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon" + ], + toState: [ + "fasta": "uncompressed_fasta", + "gtf": "gtf_uncompressed", + "transcript_fasta": "transcript_fasta_uncompressed", + "fai": "fai", + "chrom_sizes": "chrom_sizes", + "bbsplit_index": "bbsplit_index_uncompressed", + "star_index": "star_index_uncompressed", + "salmon_index": "salmon_index_uncompressed", + "kallisto_index": "kallisto_index_uncompressed", + "gene_bed": "gene_bed_uncompressed" + ] + ) + + // Check if contigs in genome fasta file > 512 Mbp + | map { id, state -> + (isBelowMaxContigSize(state.fai)) ? [id, state] : [id, state + [bam_csi_index: true]] + } + + // Pick out the state + | map { list -> list[1]} + + analysis_ch = input_ch + + | combine(reference_ch) + + | map { list -> [list[0], list[1] + list[2]] } + + // Concatenate FastQ files from same sample if required + | cat_fastq.run ( + fromState: [ + "read_1": "fastq_1", + "read_2": "fastq_2" + ], + toState: [ + "fastq_1": "fastq_1", + "fastq_2": "fastq_2" + ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // Pre-process fastq files + | pre_processing.run ( + fromState: [ + "id": "id", + "fastq_1": "fastq_1", + "fastq_2": "fastq_2", + "umitools_bc_pattern": "umitools_bc_pattern", + "umitools_bc_pattern2": "umitools_bc_pattern2", + "strandedness": "strandedness", + "transcript_fasta": "transcript_fasta", + "gtf": "gtf", + "with_umi": "with_umi", + "bbsplit_index": "bbsplit_index", + "bbsplit_fasta_list": "bbsplit_fasta_list", + "bc_pattern": "bc_pattern", + "ribo_database_manifest": "ribo_database_manifest", + "salmon_index": "salmon_index", + "skip_qc": "skip_qc", + "skip_fastqc": "skip_fastqc", + "skip_skip_umi_extract": "skip_umi_extract", + "umi_discard_read": "umi_discard_read", + "skip_trimming": "skip_trimming", + "trimmer": "trimmer", + "skip_bbsplit": "skip_bbsplit", + "remove_ribo_rna": "remove_ribo_rna" + ], + toState: [ + "fastqc_html_1": "fastqc_html_1", + "fastqc_html_2": "fastqc_html_2", + "fastqc_zip_1": "fastqc_zip_1", + "fastqc_zip_2": "fastqc_zip_2", + "fastq_1": "qc_output1", + "fastq_2": "qc_output2", + "trim_log_1": "trim_log_1", + "trim_log_2": "trim_log_2", + "trim_zip_1": "trim_zip_1", + "trim_zip_2": "trim_zip_2", + "trim_html_1": "trim_html_1", + "trim_html_2": "trim_html_2", + "passed_trimmed_reads": "passed_trimmed_reads", + "num_trimmed_reads": "num_trimmed_reads", + "sortmerna_log": "sortmerna_log", + "salmon_quant_output": "salmon_quant_output", + "fastp_failed_trim": "failed_trim", + "fastp_failed_trim_unpaired1": "failed_trim_unpaired1", + "fastp_failed_trim_unpaired2": "failed_trim_unpaired2", + "fastp_trim_json": "trim_json", + "fastp_trim_html": "trim_html", + "fastp_trim_merged_out": "trim_merged_out" + ] + ) + + // Infer strandedness from Salmon pseudo-alignment results + | map { id, state -> + (state.strandedness == 'auto') ? + [ id, state + [strandedness: getSalmonInferredStrandedness(state.salmon_quant_output)] ] : + [id, state] + } + + // Filter FastQ files based on minimum trimmed read count after adapter trimming + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def num_reads = (state.skip_trimming) ? + state.min_trimmed_reads + 1 : + ( + (state.trimmer == "fastp") ? + getFastpReadsAfterFiltering(state.fastp_trim_json) : + ( + (!state.skip_trimming && input.size() == 2) ? + getTrimGaloreReadsAfterFiltering(state.trim_log_2) : + getTrimGaloreReadsAfterFiltering(state.trim_log_1) + ) + ) + def passed_trimmed_reads = + (state.skip_trimming || (num_reads >= state.min_trimmed_reads)) ? + true : + false + [ id, state + [num_trimmed_reads: num_reads, passed_trimmed_reads: passed_trimmed_reads] ] + } + + // Genome alignment and quantification + | genome_alignment_and_quant.run ( + runIf: { id, state -> !state.skip_alignment && state.passed_trimmed_reads }, + fromState: [ + "id": "id", + "fastq_1": "fastq_1", + "fastq_2": "fastq_2", + "strandedness": "strandedness", + "gtf": "gtf", + "transcript_fasta": "transcript_fasta", + "bam_csi_index": "bam_csi_index", + "aligner": "aligner", + "rsem_index": "rsem_index", + "star_index": "star_index", + "star_sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon", + "star_ignore_sjdbgtf": "star_ignore_sjdbgtf", + "with_umi": "with_umi", + "umi_dedup_stats": "umi_dedup_stats", + "gtf_group_features": "gtf_group_features", + "gtf_extra_attributes": "gtf_extra_attributes", + "salmon_quant_libtype": "salmon_quant_libtype", + "salmon_index": "salmon_index", + "extra_rsem_calculate_expression_args": "extra_rsem_calculate_expression_args" + ], + toState: [ + "star_multiqc": "star_multiqc", + "rsem_multiqc": "rsem_multiqc", + "salmon_multiqc": "salmon_multiqc", + "genome_bam_sorted": "genome_bam_sorted", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "transcriptome_bam": "transcriptome_bam", + "transcriptome_bam_index": "transcriptome_bam_index", + "transcriptome_bam_stats": "transcriptome_bam_stats", + "transcriptome_bam_flagstat": "transcriptome_bam_flagstat", + "transcriptome_bam_idxstats": "transcriptome_bam_idxstats", + "quant_out_dir": "quant_out_dir", + "quant_results_file": "quant_results_file", + "rsem_counts_gene": "rsem_counts_gene", + "rsem_counts_transcripts": "rsem_counts_transcripts", + "bam_genome_rsem": "bam_genome_rsem", + "bam_transcript_rsem": "bam_transcript_rsem" + ] + ) + + // Filter channels to get samples that passed STAR minimum mapping percentage + | map { id, state -> + def percent_mapped = (!state.skip_alignment && state.passed_trimmed_reads) ? getStarPercentMapped(state.star_multiqc) : 0.0 + def passed_mapping = (percent_mapped >= state.min_mapped_reads) ? true : false + [ id, state + [percent_mapped: percent_mapped, passed_mapping: passed_mapping] ] + } + + // Pseudo-alignment and quantification + | pseudo_alignment_and_quant.run ( + runIf: { id, state -> !state.skip_pseudo_alignment && state.passed_trimmed_reads }, + fromState: [ + "id": "id", + "fastq_1": "fastq_1", + "fastq_2": "fastq_2", + "strandedness": "strandedness", + "gtf": "gtf", + "transcript_fasta": "transcript_fasta", + "pseudo_aligner": "pseudo_aligner", + "salmon_index": "salmon_index", + "kallisto_index": "kallisto_index", + "extra_star_align_args": "extra_star_align_args", + "star_ignore_sjdbgtf": "star_ignore_sjdbgtf", + "seq_platform": "seq_platform", + "seq_center": "seq_center", + "with_umi": "with_umi", + "umi_dedup_stats": "umi_dedup_stats", + "gtf_group_features": "gtf_group_features", + "gtf_extra_attributes": "gtf_extra_attributes", + "lib_type": "salmon_quant_libtype", + "kallisto_quant_fragment_length": "kallisto_quant_fragment_length", + "kallisto_quant_fragment_length_sd": "kallisto_quant_fragment_length_sd" + ], + toState: [ + "pseudo_quant_out_dir": "quant_out_dir", + "pseudo_salmon_quant_results_file": "salmon_quant_results_file", + "pseudo_kallisto_quant_results_file": "kallisto_quant_results_file", + "pseudo_multiqc": "pseudo_multiqc" + ] + ) + + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def paired = input.size() == 2 + [ id, state + [ paired: paired ] ] + } + + // Post-processing + | post_processing.run ( + runIf: { id, state -> !state.skip_alignment && state.passed_trimmed_reads && state.passed_mapping }, + fromState: [ + "id": "id", + "paired": "paired", + "strandedness": "strandedness", + "fasta": "fasta", + "fai": "fai", + "gtf": "gtf", + "genome_bam": "genome_bam_sorted", + "chrom_sizes": "chrom_sizes", + "star_multiqc": "star_multiqc", + "extra_picard_args": "extra_picard_args", + "extra_stringtie_args": "extra_stringtie_args", + "stringtie_ignore_gtf": "stringtie_ignore_gtf", + "extra_bedtools_args": "extra_bedtools_args", + "bam_csi_index": "bam_csi_index", + "min_mapped_reads": "min_mapped_reads", + "with_umi": "with_umi", + "skip_qc": "skip_qc", + "skip_markduplicates": "skip_markduplicates", + "skip_stringtie": "skip_stringtie", + "skip_bigwig":"gencode" + ], + toState: [ + "genome_bam_sorted": "processed_genome_bam", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "markduplicates_metrics": "markduplicates_metrics", + "stringtie_transcript_gtf": "stringtie_transcript_gtf", + "stringtie_coverage_gtf": "stringtie_coverage_gtf", + "stringtie_abundance": "stringtie_abundance", + "stringtie_ballgown": "stringtie_ballgown", + "bedgraph_forward": "bedgraph_forward", + "bedgraph_reverse": "bedgraph_reverse", + "bigwig_forward": "bigwig_forward", + "bigwig_reverse": "bigwig_reverse" + ] + ) + + // Final QC + + // Temporarily skip DESeq2 QC for now + // https://github.com/viash-hub/rnaseq/issues/31 + | map{ id, state -> [ id, state + [ skip_deseq2_qc: true ] ] } + + | quality_control.run ( + fromState: [ + "id": "id", + "paired": "paired", + "strandedness": "strandedness", + "skip_align": "skip_alignment", + "skip_pseudo_align": "skip_pseudo_alignment", + "skip_dupradar": "skip_dupradar", + "skip_qualimap": "skip_qualimap", + "skip_rseqc": "skip_rseqc", + "skip_multiqc": "skip_multiqc", + "skip_preseq": "skip_preseq", + "gtf": "gtf", + "num_trimmed_reads": "num_trimmed_reads", + "passed_trimmed_reads": "passed_trimmed_reads", + "passed_mapping": "passed_mapping", + "percent_mapped": "percent_mapped", + "genome_bam": "genome_bam_sorted", + "genome_bam_index": "genome_bam_index", + "salmon_multiqc": "salmon_multiqc", + "quant_results_file": "quant_results_file", + "rsem_multiqc": "rsem_multiqc", + "rsem_counts_gene": "rsem_counts_gene", + "rsem_counts_transcripts": "rsem_counts_transcripts", + "pseudo_multiqc": "pseudo_multiqc", + "pseudo_quant_out_dir": "pseudo_quant_out_dir", + "pseudo_salmon_quant_results_file": "pseudo_salmon_quant_results_file", + "pseudo_kallisto_quant_results_file": "pseudo_kallisto_quant_results_file", + "aligner": "aligner", + "pseudo_aligner": "pseudo_aligner", + "gene_bed": "gene_bed", + "extra_preseq_args": "extra_preseq_args", + "biotype": "biotype", + "skip_biotype_qc": "skip_biotype_qc", + "featurecounts_group_type": "featurecounts_group_type", + "featurecounts_feature_type": "featurecounts_feature_type", + "gencode": "gencode", + "skip_deseq2_qc": "skip_deseq2_qc", + "deseq2_vst": "deseq2_vst", + "multiqc_custom_config": "multiqc_custom_config", + "multiqc_title": "multiqc_title", + "multiqc_methods_description": "multiqc_methods_description", + "fastqc_zip_1": "fastqc_zip_1", + "fastqc_zip_2": "fastqc_zip_2", + "trim_log_1": "trim_log_1", + "trim_log_2": "trim_log_2", + "trim_zip_1": "trim_zip_1", + "trim_zip_2": "trim_zip_2", + "sortmerna_multiqc": "sortmerna_log", + "star_multiqc": "star_multiqc", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "markduplicates_multiqc": "markduplicates_metrics", + "rseqc_modules": "rseqc_modules" + ], + toState: [ + "preseq_output": "preseq_output", + "bamstat_output": "bamstat_output", + "strandedness_output": "strandedness_output", + "inner_dist_output_stats": "inner_dist_output_stats", + "inner_dist_output_dist": "inner_dist_output_dist", + "inner_dist_output_freq": "inner_dist_output_freq", + "inner_dist_output_plot": "inner_dist_output_plot", + "inner_dist_output_plot_r": "inner_dist_output_plot_r", + "junction_annotation_output_log": "junction_annotation_output_log", + "junction_annotation_output_plot_r": "junction_annotation_output_plot_r", + "junction_annotation_output_junction_bed": "junction_annotation_output_junction_bed", + "junction_annotation_output_junction_interact": "junction_annotation_output_junction_interact", + "junction_annotation_output_junction_sheet": "junction_annotation_output_junction_sheet", + "junction_annotation_output_splice_events_plot": "junction_annotation_output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "junction_annotation_output_splice_junctions_plot", + "junction_saturation_output_plot_r": "junction_saturation_output_plot_r", + "junction_saturation_output_plot": "junction_saturation_output_plot", + "read_distribution_output": "read_distribution_output", + "read_duplication_output_duplication_rate_plot_r": "read_duplication_output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "read_duplication_output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "read_duplication_output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "read_duplication_output_duplication_rate_sequence", + "tin_output_summary": "tin_output_summary", + "tin_output_metrics": "tin_output_metrics", + "dupradar_output_dupmatrix": "dupradar_output_dupmatrix", + "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", + "dupradar_output_duprate_exp_boxplot": "dupradar_output_duprate_exp_boxplot", + "dupradar_output_duprate_exp_densplot": "dupradar_output_duprate_exp_densplot", + "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", + "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", + "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", + "featurecounts": "featurecounts", + "featurecounts_summary": "featurecounts_summary", + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc", + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment", + "deseq2_output": "deseq2_output", + "pseudo_tpm_gene": "pseudo_tpm_gene", + "pseudo_counts_gene": "pseudo_counts_gene", + "pseudo_counts_gene_length_scaled": "pseudo_counts_gene_length_scaled", + "pseudo_counts_gene_scaled": "pseudo_counts_gene_scaled", + "pseudo_tpm_transcript": "pseudo_tpm_transcript", + "pseudo_counts_transcript": "pseudo_counts_transcript", + "pseudo_lengths_gene": "pseudo_lengths_gene", + "pseudo_lengths_transcript": "pseudo_lengths_transcript", + "pseudo_quant_merged_summarizedexperiment": "pseudo_quant_merged_summarizedexperiment", + "deseq2_output_pseudo": "deseq2_output_pseudo", + "multiqc_report": "multiqc_report", + "multiqc_data": "multiqc_data", + "multiqc_plots": "multiqc_plots" + ] + ) + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + [ + "output_fasta": "fasta", + "output_gtf": "gtf", + "output_transcript_fasta": "transcript_fasta", + "output_gene_bed": "gene_bed", + "output_bbsplit_index": "bbsplit_index", + "output_star_index": "star_index", + "output_salmon_index": "salmon_index", + "output_kallisto_index": "kallisto_index", + "fastqc_html_1": "fastqc_html_1", + "fastqc_html_2": "fastqc_html_2", + "fastqc_zip_1": "fastqc_zip_1", + "fastqc_zip_2": "fastqc_zip_2", + "output_fastq_1": "fastq_1", + "output_fastq_2": "fastq_2", + "trim_log_1": "trim_log_1", + "trim_log_2": "trim_log_2", + "trim_zip_1": "trim_zip_1", + "trim_zip_2": "trim_zip_2", + "trim_html_1": "trim_html_1", + "trim_html_2": "trim_html_2", + "sortmerna_log": "sortmerna_log", + "star_log": "star_multiqc", + "genome_bam_sorted": "genome_bam_sorted", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "transcriptome_bam": "transcriptome_bam", + "transcriptome_bam_index": "transcriptome_bam_index", + "transcriptome_bam_stats": "transcriptome_bam_stats", + "transcriptome_bam_flagstat": "transcriptome_bam_flagstat", + "transcriptome_bam_idxstats": "transcriptome_bam_idxstats", + "salmon_quant_results": "quant_out_dir", + "pseudo_quant_results": "pseudo_quant_out_dir", + "markduplicates_metrics": "markduplicates_metrics", + "stringtie_transcript_gtf": "stringtie_transcript_gtf", + "stringtie_coverage_gtf": "stringtie_coverage_gtf", + "stringtie_abundance": "stringtie_abundance", + "stringtie_ballgown": "stringtie_ballgown", + "featurecounts": "featurecounts", + "featurecounts_summary": "featurecounts_summary", + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc", + "bedgraph_forward": "bedgraph_forward", + "bedgraph_reverse": "bedgraph_reverse", + "bigwig_forward": "bigwig_forward", + "bigwig_reverse": "bigwig_reverse", + "preseq_output": "preseq_output", + "bamstat_output": "bamstat_output", + "strandedness_output": "strandedness_output", + "inner_dist_output_stats": "inner_dist_output_stats", + "inner_dist_output_dist": "inner_dist_output_dist", + "inner_dist_output_freq": "inner_dist_output_freq", + "inner_dist_output_plot": "inner_dist_output_plot", + "inner_dist_output_plot_r": "inner_dist_output_plot_r", + "junction_annotation_output_log": "junction_annotation_output_log", + "junction_annotation_output_plot_r": "junction_annotation_output_plot_r", + "junction_annotation_output_junction_bed": "junction_annotation_output_junction_bed", + "junction_annotation_output_junction_interact": "junction_annotation_output_junction_interact", + "junction_annotation_output_junction_sheet": "junction_annotation_output_junction_sheet", + "junction_annotation_output_splice_events_plot": "junction_annotation_output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "junction_annotation_output_splice_junctions_plot", + "junction_saturation_output_plot_r": "junction_saturation_output_plot_r", + "junction_saturation_output_plot": "junction_saturation_output_plot", + "read_distribution_output": "read_distribution_output", + "read_duplication_output_duplication_rate_plot_r": "read_duplication_output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "read_duplication_output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "read_duplication_output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "read_duplication_output_duplication_rate_sequence", + "tin_output_summary": "tin_output_summary", + "tin_output_metrics": "tin_output_metrics", + "dupradar_output_dupmatrix": "dupradar_output_dupmatrix", + "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", + "dupradar_output_duprate_exp_boxplot": "dupradar_output_duprate_exp_boxplot", + "dupradar_output_duprate_exp_densplot": "dupradar_output_duprate_exp_densplot", + "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", + "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", + "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment", + "deseq2_output": "deseq2_output", + "pseudo_tpm_gene": "pseudo_tpm_gene", + "pseudo_counts_gene": "pseudo_counts_gene", + "pseudo_counts_gene_length_scaled": "pseudo_counts_gene_length_scaled", + "pseudo_counts_gene_scaled": "pseudo_counts_gene_scaled", + "pseudo_tpm_transcript": "pseudo_tpm_transcript", + "pseudo_counts_transcript": "pseudo_counts_transcript", + "pseudo_lengths_gene": "pseudo_lengths_gene", + "pseudo_lengths_transcript": "pseudo_lengths_transcript", + "pseudo_quant_merged_summarizedexperiment": "pseudo_quant_merged_summarizedexperiment", + "deseq2_output_pseudo": "deseq2_output_pseudo", + "multiqc_report": "multiqc_report", + "multiqc_data": "multiqc_data", + "multiqc_plots": "multiqc_plots" + ] + ) + + emit: + analysis_ch +} + +import nextflow.Nextflow +// +// Function to generate an error if contigs in genome fasta file > 512 Mbp +// +def isBelowMaxContigSize(fai_file) { + def max_size = 512000000 + fai_file.eachLine { line -> + def lspl = line.split('\t') + def chrom = lspl[0] + def size = lspl[1] + if (size.toInteger() > max_size) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Contig longer than ${max_size}bp found in reference genome!\n\n" + + " ${chrom}: ${size}\n\n" + + " Provide the '--bam_csi_index' parameter to use a CSI instead of BAI index.\n\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.error(error_string) + return false + } + } + return true +} + +// Find a file in a directory +import java.nio.file.* +def findFile(Path dir, String fileName) { + Files.walk(dir) + .filter { Files.isRegularFile(it) && it.fileName.toString() == fileName } + .findFirst() + .orElse(null) +} + +import groovy.json.JsonSlurper +// +// Function that parses Salmon quant 'meta_info.json' output file to get inferred strandedness +// +def getSalmonInferredStrandedness(salmon_quant_output) { + def json_file = findFile(salmon_quant_output, 'meta_info.json') + def lib_type = new JsonSlurper().parseText(json_file.text).get('library_types')[0] + def strandedness = 'reverse' + if (lib_type) { + if (lib_type in ['U', 'IU']) { + strandedness = 'unstranded' + } + else if (lib_type in ['SF', 'ISF']) { + strandedness = 'forward' + } + else if (lib_type in ['SR', 'ISR']) { + strandedness = 'reverse' + } + } + return strandedness +} + +// +// Function that parses TrimGalore log output file to get total number of reads after trimming +// +def getTrimGaloreReadsAfterFiltering(log_file) { + def total_reads = 0 + def filtered_reads = 0 + log_file.eachLine { line -> + def total_reads_matcher = line =~ /([\d\.]+)\ssequences processed in total/ + def filtered_reads_matcher = line =~ /shorter than the length cutoff[^:]+:\s([\d\.]+)/ + if (total_reads_matcher) total_reads = total_reads_matcher[0][1].toFloat() + if (filtered_reads_matcher) filtered_reads = filtered_reads_matcher[0][1].toFloat() + } + return total_reads - filtered_reads +} + +// +// Function that parses fastp json output file to get total number of reads after trimming +// +def getFastpReadsAfterFiltering(json_file) { + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') + return json['after_filtering']['total_reads'].toLong() +} + +// +// Function that parses and returns the alignment rate from the STAR log outputs +// +def getStarPercentMapped(align_log) { + def percent_aligned = 0 + def pattern = /Uniquely mapped reads %\s*\|\s*([\d\.]+)%/ + align_log.eachLine { line -> + def matcher = line =~ pattern + if (matcher) { + percent_aligned = matcher[0][1].toFloat() + } + } + return percent_aligned +} diff --git a/src/workflows/rnaseq/nextflow.config b/src/workflows/rnaseq/nextflow.config new file mode 100644 index 0000000..39ba610 --- /dev/null +++ b/src/workflows/rnaseq/nextflow.config @@ -0,0 +1,14 @@ +manifest { + nextflowVersion = '!>=20.12.1-edge' +} + +params { + rootDir = java.nio.file.Paths.get("$projectDir/../../../").toAbsolutePath().normalize().toString() +} + +// Some important settings +docker.fixOwnership = true +process.container = "nextflow/nextflow:24.04.2" + +// include common settings +includeConfig("${params.rootDir}/src/workflows/utils/labels.config") diff --git a/src/workflows/rnaseq/test.nf b/src/workflows/rnaseq/test.nf new file mode 100644 index 0000000..b57f2ea --- /dev/null +++ b/src/workflows/rnaseq/test.nf @@ -0,0 +1,53 @@ +include { rnaseq } from params.rootDir + "/target/nextflow/workflows/rnaseq/main.nf" + +params.resources_test = params.rootDir + "/resources_test" + +workflow test_wf { + + resources_test = file(params.resources_test) + + output_ch = + + Channel.fromList([ + [ + id: "_test", + fastq_1: "https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357070_1.fastq.gz;" + + "https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz" , + fastq_2: "https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357070_2.fastq.gz;" + + "https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz", + strandedness: "reverse", + fasta: "https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/reference/genome.fasta", + gtf: "https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/reference/genes.gtf.gz", + additional_fasta: "https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/reference/gfp.fa.gz", + transcript_fasta: "https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/reference/transcriptome.fasta", + salmon_index: "https://github.com/nf-core/test-datasets/raw/refs/heads/rnaseq/reference/salmon.tar.gz", + publish_dir: "output/", + skip_deseq2_qc: true + ] + ]) + + | map { state -> [state.id, state] } + + | rnaseq.run( + fromState: { id, state -> state }, + toState: { id, output, state -> output } + ) + + | view { output -> + assert output.size() == 2 : "Outputs should contain two elements; [id, state]" + + // check id + def id = output[0] + assert id.endsWith("_test") + + // check output + def state = output[1] + assert state instanceof Map : "State should be a map. Found: ${state}" + assert state.containsKey("output_fasta") : "Output should contain key 'output_fasta'." + assert state.output_fasta.isFile() : "'output_fasta' should be a file." + assert state.containsKey("output_gtf") : "Output should contain key 'output_gtf'." + assert state.output_gtf.isFile() : "'output_gtf' should be a file." + + "Output: $output" + } +} diff --git a/src/workflows/rnaseq/test_run.sh b/src/workflows/rnaseq/test_run.sh new file mode 100755 index 0000000..ecca3a9 --- /dev/null +++ b/src/workflows/rnaseq/test_run.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +viash ns build --setup cb --parallel + +cat > testData/minimal_test/input_fastq/sample_sheet.csv << HERE +id,fastq_1,fastq_2,strandedness +WT_REP1,SRR6357070_1.fastq.gz;SRR6357071_1.fastq.gz,SRR6357070_2.fastq.gz;SRR6357071_2.fastq.gz,reverse +WT_REP2,SRR6357072_1.fastq.gz,SRR6357072_2.fastq.gz,reverse +RAP1_IAA_30M_REP1,SRR6357076_1.fastq.gz,SRR6357076_2.fastq.gz,reverse +RAP1_UNINDUCED_REP1,SRR6357073_1.fastq.gz,,reverse +RAP1_UNINDUCED_REP2,SRR6357074_1.fastq.gz;SRR6357075_1.fastq.gz,,reverse +HERE + +echo ">> Test 1: Trimming reads with Trim galore; alignment with STAR and quantification with Salmon" +nextflow run target/nextflow/workflows/rnaseq/main.nf \ + --param_list testData/minimal_test/input_fastq/sample_sheet.csv \ + --publish_dir test_results/full_pipeline_test1 \ + --fasta testData/minimal_test/reference/genome.fasta \ + --gtf testData/minimal_test/reference/genes.gtf.gz \ + --additional_fasta testData/minimal_test/reference/gfp.fa.gz \ + --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ + --bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \ + --skip_pseudo_alignment \ + -profile docker \ + --resume + + +# echo ">> Test 2: Trimming reads with Trim galore; alignment with STAR and quantification with Salmon; pseudo-alignment and quantification with Kallisto" +# nextflow run target/nextflow/workflows/rnaseq/main.nf \ +# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \ +# --publish_dir test_results/full_pipeline_test2 \ +# --fasta testData/minimal_test/reference/genome.fasta \ +# --gtf testData/minimal_test/reference/genes.gtf.gz \ +# --additional_fasta testData/minimal_test/reference/gfp.fa.gz \ +# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ +# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \ +# --pseudo_aligner kallisto \ +# --kallisto_quant_fragment_length 100 \ +# --kallisto_quant_fragment_length_sd 10 \ +# -profile docker --resume + + +# echo ">> Test 3: Trimming reads with fastp; skip alignment; pseudo alignment and quantification with Salmon" +# nextflow run target/nextflow/workflows/rnaseq/main.nf \ +# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \ +# --publish_dir test_results/full_pipeline_test3 \ +# --fasta testData/minimal_test/reference/genome.fasta \ +# --gtf testData/minimal_test/reference/genes.gtf.gz \ +# --additional_fasta testData/minimal_test/reference/gfp.fa.gz \ +# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ +# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \ +# --trimmer fastp \ +# --skip_alignment \ +# -profile docker --resume + + +# echo ">> Test 4: Trimming reads with Trim galore; alignment and quantification with RSEM (STAR)" +# nextflow run target/nextflow/workflows/rnaseq/main.nf \ +# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \ +# --publish_dir test_results/full_pipeline_test4 \ +# --fasta testData/minimal_test/reference/genome.fasta \ +# --gtf testData/minimal_test/reference/genes.gtf.gz \ +# --additional_fasta testData/minimal_test/reference/gfp.fa.gz \ +# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ +# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \ +# --aligner star_rsem \ +# --skip_pseudo_alignment \ +# -profile docker \ +# --resume diff --git a/src/workflows/utils/labels.config b/src/workflows/utils/labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/src/workflows/utils/labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/.build.yaml b/target/.build.yaml new file mode 100644 index 0000000..e69de29 diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml new file mode 100644 index 0000000..e6d6beb --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml @@ -0,0 +1,409 @@ +name: "bbmap_bbsplit" +namespace: "bbmap" +version: "v0.3.1" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "Sample ID" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--paired" + description: "Paired fastq files or not?" + info: null + direction: "input" + - type: "file" + name: "--input" + description: "Input fastq files, either one or two (paired), separated by \";\"\ + ." + info: null + example: + - "reads.fastq" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--ref" + description: "Reference FASTA files, separated by \";\". The primary reference\ + \ should be specified first." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "boolean_true" + name: "--only_build_index" + description: "If set, only builds the index. Otherwise, mapping is performed." + info: null + direction: "input" + - type: "file" + name: "--build" + description: "Index to be used for mapping. \n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--qin" + description: "Set to 33 or 64 to specify input quality value ASCII offset. Automatically\ + \ detected if\nnot specified.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--interleaved" + description: "True forces paired/interleaved input; false forces single-ended\ + \ mapping.\nIf not specified, interleaved status will be autodetected from read\ + \ names.\n" + info: null + direction: "input" + - type: "integer" + name: "--maxindel" + description: "Don't look for indels longer than this. Lower is faster. Set to\ + \ >=100k for RNA-seq.\n" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--minratio" + description: "Fraction of max alignment score required to keep a site. Higher\ + \ is faster.\n" + info: null + example: + - 0.56 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--minhits" + description: "Minimum number of seed hits required for candidate sites. Higher\ + \ is faster.\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--ambiguous" + description: "Set behavior on ambiguously-mapped reads (with multiple top-scoring\ + \ mapping locations).\n * best Use the first best site (Default)\n * toss\ + \ Consider unmapped\n * random Select one top-scoring site randomly\n \ + \ * all Retain all top-scoring sites. Does not work yet with SAM output\n" + info: null + example: + - "best" + required: false + choices: + - "best" + - "toss" + - "random" + - "all" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--ambiguous2" + description: "Set behavior only for reads that map ambiguously to multiple different\ + \ references.\nNormal 'ambiguous=' controls behavior on all ambiguous reads;\n\ + Ambiguous2 excludes reads that map ambiguously within a single reference.\n\ + \ * best Use the first best site (Default)\n * toss Consider unmapped\n\ + \ * all Write a copy to the output for each reference to which it maps\n\ + \ * split Write a copy to the AMBIGUOUS_ output for each reference to which\ + \ it maps\n" + info: null + example: + - "best" + required: false + choices: + - "best" + - "toss" + - "all" + - "split" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--qtrim" + description: "Quality-trim ends to Q5 before mapping. Options are 'l' (left),\ + \ 'r' (right), and 'lr' (both).\n" + info: null + required: false + choices: + - "l" + - "r" + - "lr" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--untrim" + description: "Undo trimming after mapping. Untrimmed bases will be soft-clipped\ + \ in cigar strings." + info: null + direction: "input" +- name: "Output" + arguments: + - type: "file" + name: "--index" + description: "Location to write the index.\n" + info: null + example: + - "BBSplit_index" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_1" + description: "Output file for read 1.\n" + info: null + example: + - "read_out1.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Output file for read 2.\n" + info: null + example: + - "read_out2.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sam2bam" + alternatives: + - "--bs" + description: "Write a shell script to 'file' that will turn the sam output into\ + \ a sorted, indexed bam file.\n" + info: null + example: + - "script.sh" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--scafstats" + description: "Write statistics on how many reads mapped to which scaffold to this\ + \ file.\n" + info: null + example: + - "scaffold_stats.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--refstats" + description: "Write statistics on how many reads were assigned to which reference\ + \ to this file.\nUnmapped reads whose mate mapped to a reference are considered\ + \ assigned and will be counted.\n" + info: null + example: + - "reference_stats.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--nzo" + description: "Only print lines with nonzero coverage." + info: null + direction: "input" + - type: "string" + name: "--bbmap_args" + description: "Additional arguments from BBMap to pass to BBSplit.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Split sequencing reads by mapping them to multiple references simultaneously." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +license: "BBTools Copyright (c) 2014" +links: + repository: "https://github.com/BioInfoTools/BBMap/blob/master/sh/bbsplit.sh" + homepage: "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/" + documentation: "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbmap-guide/" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && \\\napt-get install -y build-essential openjdk-17-jdk wget\ + \ tar && \\\nwget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz\ + \ && \\\ntar xzf BBMap_39.01.tar.gz && \\\ncp -r bbmap/* /usr/local/bin\n" + - type: "docker" + run: + - "bbsplit.sh --version 2>&1 | awk '/BBMap version/{print \"BBMAP:\", $NF}' >\ + \ /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/bbmap/bbmap_bbsplit/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/bbmap/bbmap_bbsplit" + executable: "target/nextflow/bbmap/bbmap_bbsplit/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/main.nf new file mode 100644 index 0000000..f11940c --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/main.nf @@ -0,0 +1,4130 @@ +// bbmap_bbsplit v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "bbmap_bbsplit", + "namespace" : "bbmap", + "version" : "v0.3.1", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "Sample ID", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--paired", + "description" : "Paired fastq files or not?", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--input", + "description" : "Input fastq files, either one or two (paired), separated by \\";\\".", + "example" : [ + "reads.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--ref", + "description" : "Reference FASTA files, separated by \\";\\". The primary reference should be specified first.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--only_build_index", + "description" : "If set, only builds the index. Otherwise, mapping is performed.", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--build", + "description" : "Index to be used for mapping. \n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--qin", + "description" : "Set to 33 or 64 to specify input quality value ASCII offset. Automatically detected if\nnot specified.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--interleaved", + "description" : "True forces paired/interleaved input; false forces single-ended mapping.\nIf not specified, interleaved status will be autodetected from read names.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--maxindel", + "description" : "Don't look for indels longer than this. Lower is faster. Set to >=100k for RNA-seq.\n", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--minratio", + "description" : "Fraction of max alignment score required to keep a site. Higher is faster.\n", + "example" : [ + 0.56 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--minhits", + "description" : "Minimum number of seed hits required for candidate sites. Higher is faster.\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--ambiguous", + "description" : "Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations).\n * best Use the first best site (Default)\n * toss Consider unmapped\n * random Select one top-scoring site randomly\n * all Retain all top-scoring sites. Does not work yet with SAM output\n", + "example" : [ + "best" + ], + "required" : false, + "choices" : [ + "best", + "toss", + "random", + "all" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--ambiguous2", + "description" : "Set behavior only for reads that map ambiguously to multiple different references.\nNormal 'ambiguous=' controls behavior on all ambiguous reads;\nAmbiguous2 excludes reads that map ambiguously within a single reference.\n * best Use the first best site (Default)\n * toss Consider unmapped\n * all Write a copy to the output for each reference to which it maps\n * split Write a copy to the AMBIGUOUS_ output for each reference to which it maps\n", + "example" : [ + "best" + ], + "required" : false, + "choices" : [ + "best", + "toss", + "all", + "split" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--qtrim", + "description" : "Quality-trim ends to Q5 before mapping. Options are 'l' (left), 'r' (right), and 'lr' (both).\n", + "required" : false, + "choices" : [ + "l", + "r", + "lr" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--untrim", + "description" : "Undo trimming after mapping. Untrimmed bases will be soft-clipped in cigar strings.", + "direction" : "input" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--index", + "description" : "Location to write the index.\n", + "example" : [ + "BBSplit_index" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_1", + "description" : "Output file for read 1.\n", + "example" : [ + "read_out1.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_2", + "description" : "Output file for read 2.\n", + "example" : [ + "read_out2.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sam2bam", + "alternatives" : [ + "--bs" + ], + "description" : "Write a shell script to 'file' that will turn the sam output into a sorted, indexed bam file.\n", + "example" : [ + "script.sh" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--scafstats", + "description" : "Write statistics on how many reads mapped to which scaffold to this file.\n", + "example" : [ + "scaffold_stats.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--refstats", + "description" : "Write statistics on how many reads were assigned to which reference to this file.\nUnmapped reads whose mate mapped to a reference are considered assigned and will be counted.\n", + "example" : [ + "reference_stats.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--nzo", + "description" : "Only print lines with nonzero coverage.", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--bbmap_args", + "description" : "Additional arguments from BBMap to pass to BBSplit.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Split sequencing reads by mapping them to multiple references simultaneously.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "license" : "BBTools Copyright (c) 2014", + "links" : { + "repository" : "https://github.com/BioInfoTools/BBMap/blob/master/sh/bbsplit.sh", + "homepage" : "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/", + "documentation" : "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbmap-guide/" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\napt-get install -y build-essential openjdk-17-jdk wget tar && \\\\\nwget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz && \\\\\ntar xzf BBMap_39.01.tar.gz && \\\\\ncp -r bbmap/* /usr/local/bin\n" + ] + }, + { + "type" : "docker", + "run" : [ + "bbsplit.sh --version 2>&1 | awk '/BBMap version/{print \\"BBMAP:\\", $NF}' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/bbmap/bbmap_bbsplit/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/bbmap/bbmap_bbsplit", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_REF+x} ]; then echo "${VIASH_PAR_REF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ref='&'#" ; else echo "# par_ref="; fi ) +$( if [ ! -z ${VIASH_PAR_ONLY_BUILD_INDEX+x} ]; then echo "${VIASH_PAR_ONLY_BUILD_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_only_build_index='&'#" ; else echo "# par_only_build_index="; fi ) +$( if [ ! -z ${VIASH_PAR_BUILD+x} ]; then echo "${VIASH_PAR_BUILD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_build='&'#" ; else echo "# par_build="; fi ) +$( if [ ! -z ${VIASH_PAR_QIN+x} ]; then echo "${VIASH_PAR_QIN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qin='&'#" ; else echo "# par_qin="; fi ) +$( if [ ! -z ${VIASH_PAR_INTERLEAVED+x} ]; then echo "${VIASH_PAR_INTERLEAVED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_interleaved='&'#" ; else echo "# par_interleaved="; fi ) +$( if [ ! -z ${VIASH_PAR_MAXINDEL+x} ]; then echo "${VIASH_PAR_MAXINDEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_maxindel='&'#" ; else echo "# par_maxindel="; fi ) +$( if [ ! -z ${VIASH_PAR_MINRATIO+x} ]; then echo "${VIASH_PAR_MINRATIO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_minratio='&'#" ; else echo "# par_minratio="; fi ) +$( if [ ! -z ${VIASH_PAR_MINHITS+x} ]; then echo "${VIASH_PAR_MINHITS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_minhits='&'#" ; else echo "# par_minhits="; fi ) +$( if [ ! -z ${VIASH_PAR_AMBIGUOUS+x} ]; then echo "${VIASH_PAR_AMBIGUOUS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ambiguous='&'#" ; else echo "# par_ambiguous="; fi ) +$( if [ ! -z ${VIASH_PAR_AMBIGUOUS2+x} ]; then echo "${VIASH_PAR_AMBIGUOUS2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ambiguous2='&'#" ; else echo "# par_ambiguous2="; fi ) +$( if [ ! -z ${VIASH_PAR_QTRIM+x} ]; then echo "${VIASH_PAR_QTRIM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qtrim='&'#" ; else echo "# par_qtrim="; fi ) +$( if [ ! -z ${VIASH_PAR_UNTRIM+x} ]; then echo "${VIASH_PAR_UNTRIM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_untrim='&'#" ; else echo "# par_untrim="; fi ) +$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQ_1+x} ]; then echo "${VIASH_PAR_FASTQ_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_1='&'#" ; else echo "# par_fastq_1="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQ_2+x} ]; then echo "${VIASH_PAR_FASTQ_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_2='&'#" ; else echo "# par_fastq_2="; fi ) +$( if [ ! -z ${VIASH_PAR_SAM2BAM+x} ]; then echo "${VIASH_PAR_SAM2BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sam2bam='&'#" ; else echo "# par_sam2bam="; fi ) +$( if [ ! -z ${VIASH_PAR_SCAFSTATS+x} ]; then echo "${VIASH_PAR_SCAFSTATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_scafstats='&'#" ; else echo "# par_scafstats="; fi ) +$( if [ ! -z ${VIASH_PAR_REFSTATS+x} ]; then echo "${VIASH_PAR_REFSTATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_refstats='&'#" ; else echo "# par_refstats="; fi ) +$( if [ ! -z ${VIASH_PAR_NZO+x} ]; then echo "${VIASH_PAR_NZO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nzo='&'#" ; else echo "# par_nzo="; fi ) +$( if [ ! -z ${VIASH_PAR_BBMAP_ARGS+x} ]; then echo "${VIASH_PAR_BBMAP_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bbmap_args='&'#" ; else echo "# par_bbmap_args="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -eo pipefail + +function clean_up { + rm -rf "\\$tmpdir" +} +trap clean_up EXIT + +unset_if_false=( par_paired par_only_build_index par_interleaved par_untrim par_nzo) + +for var in "\\${unset_if_false[@]}"; do + if [ -z "\\${!var}" ]; then + unset \\$var + fi +done + +if [ ! -d "\\$par_build" ]; then + IFS=";" read -ra ref_files <<< "\\$par_ref" + primary_ref="\\${ref_files[0]}" + refs=() + for file in "\\${ref_files[@]:1}" + do + name=\\$(basename "\\$file" | sed 's/\\\\.[^.]*\\$//') + refs+=("ref_\\$name=\\$file") + done +fi + +if \\$par_only_build_index; then + if [ "\\${#refs[@]}" -gt 1 ]; then + bbsplit.sh \\\\ + --ref_primary="\\$primary_ref" \\\\ + "\\${refs[@]}" \\\\ + path=\\$par_index + else + echo "ERROR: Please specify at least two reference fasta files." + fi +else + IFS=";" read -ra input <<< "\\$par_input" + tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_name-XXXXXXXX") + index_files='' + if [ -d "\\$par_build" ]; then + index_files="path=\\$par_build" + elif [ \\${#refs[@]} -gt 0 ]; then + index_files="--ref_primary=\\$primary_ref \\${refs[*]}" + else + echo "ERROR: Please either specify a BBSplit index as input or at least two reference fasta files." + fi + + extra_args="" + if [ -f "\\$par_refstats" ]; then extra_args+=" --refstats \\$par_refstats"; fi + if [ -n "\\$par_ambiguous" ]; then extra_args+=" --ambiguous \\$par_ambiguous"; fi + if [ -n "\\$par_ambiguous2" ]; then extra_args+=" --ambiguous2 \\$par_ambiguous2"; fi + if [ -n "\\$par_minratio" ]; then extra_args+=" --minratio \\$par_minratio"; fi + if [ -n "\\$par_minhits" ]; then extra_args+=" --minhits \\$par_minhits"; fi + if [ -n "\\$par_maxindel" ]; then extra_args+=" --maxindel \\$par_maxindel"; fi + if [ -n "\\$par_qin" ]; then extra_args+=" --qin \\$par_qin"; fi + if [ -n "\\$par_qtrim" ]; then extra_args+=" --qtrim \\$par_qtrim"; fi + if [ "\\$par_interleaved" = true ]; then extra_args+=" --interleaved"; fi + if [ "\\$par_untrim" = true ]; then extra_args+=" --untrim"; fi + if [ "\\$par_nzo" = true ]; then extra_args+=" --nzo"; fi + + if [ -n "\\$par_bbmap_args" ]; then extra_args+=" \\$par_bbmap_args"; fi + + + if \\$par_paired; then + bbsplit.sh \\\\ + \\$index_files \\\\ + in=\\${input[0]} \\\\ + in2=\\${input[1]} \\\\ + basename=\\${tmpdir}/%_#.fastq \\\\ + \\$extra_args + read1=\\$(find \\$tmpdir/ -iname primary_1*) + read2=\\$(find \\$tmpdir/ -iname primary_2*) + cp \\$read1 \\$par_fastq_1 + cp \\$read2 \\$par_fastq_2 + else + bbsplit.sh \\\\ + \\$index_files \\\\ + in=\\${input[0]} \\\\ + basename=\\${tmpdir}/%.fastq \\\\ + \\$extra_args + read1=\\$(find \\$tmpdir/ -iname primary*) + cp \\$read1 \\$par_fastq_1 + fi +fi + +exit 0 +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/bbmap/bbmap_bbsplit", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/nextflow.config new file mode 100644 index 0000000..a55322e --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'bbmap/bbmap_bbsplit' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Split sequencing reads by mapping them to multiple references simultaneously.' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/nextflow_schema.json new file mode 100644 index 0000000..7b8b1ed --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/nextflow_schema.json @@ -0,0 +1,321 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "bbmap_bbsplit", +"description": "Split sequencing reads by mapping them to multiple references simultaneously.", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "string", + "description": "Type: `string`. Sample ID", + "help_text": "Type: `string`. Sample ID" + + } + + + , + "paired": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Paired fastq files or not?", + "help_text": "Type: `boolean_true`, default: `false`. Paired fastq files or not?" + , + "default":false + } + + + , + "input": { + "type": + "string", + "description": "Type: List of `file`, example: `reads.fastq`, multiple_sep: `\";\"`. Input fastq files, either one or two (paired), separated by \";\"", + "help_text": "Type: List of `file`, example: `reads.fastq`, multiple_sep: `\";\"`. Input fastq files, either one or two (paired), separated by \";\"." + + } + + + , + "ref": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. Reference FASTA files, separated by \";\"", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Reference FASTA files, separated by \";\". The primary reference should be specified first." + + } + + + , + "only_build_index": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If set, only builds the index", + "help_text": "Type: `boolean_true`, default: `false`. If set, only builds the index. Otherwise, mapping is performed." + , + "default":false + } + + + , + "build": { + "type": + "string", + "description": "Type: `file`. Index to be used for mapping", + "help_text": "Type: `file`. Index to be used for mapping. \n" + + } + + + , + "qin": { + "type": + "string", + "description": "Type: `string`. Set to 33 or 64 to specify input quality value ASCII offset", + "help_text": "Type: `string`. Set to 33 or 64 to specify input quality value ASCII offset. Automatically detected if\nnot specified.\n" + + } + + + , + "interleaved": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. True forces paired/interleaved input; false forces single-ended mapping", + "help_text": "Type: `boolean_true`, default: `false`. True forces paired/interleaved input; false forces single-ended mapping.\nIf not specified, interleaved status will be autodetected from read names.\n" + , + "default":false + } + + + , + "maxindel": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. Don\u0027t look for indels longer than this", + "help_text": "Type: `integer`, example: `20`. Don\u0027t look for indels longer than this. Lower is faster. Set to \u003e=100k for RNA-seq.\n" + + } + + + , + "minratio": { + "type": + "number", + "description": "Type: `double`, example: `0.56`. Fraction of max alignment score required to keep a site", + "help_text": "Type: `double`, example: `0.56`. Fraction of max alignment score required to keep a site. Higher is faster.\n" + + } + + + , + "minhits": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Minimum number of seed hits required for candidate sites", + "help_text": "Type: `integer`, example: `1`. Minimum number of seed hits required for candidate sites. Higher is faster.\n" + + } + + + , + "ambiguous": { + "type": + "string", + "description": "Type: `string`, example: `best`, choices: ``best`, `toss`, `random`, `all``. Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations)", + "help_text": "Type: `string`, example: `best`, choices: ``best`, `toss`, `random`, `all``. Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations).\n * best Use the first best site (Default)\n * toss Consider unmapped\n * random Select one top-scoring site randomly\n * all Retain all top-scoring sites. Does not work yet with SAM output\n", + "enum": ["best", "toss", "random", "all"] + + + } + + + , + "ambiguous2": { + "type": + "string", + "description": "Type: `string`, example: `best`, choices: ``best`, `toss`, `all`, `split``. Set behavior only for reads that map ambiguously to multiple different references", + "help_text": "Type: `string`, example: `best`, choices: ``best`, `toss`, `all`, `split``. Set behavior only for reads that map ambiguously to multiple different references.\nNormal \u0027ambiguous=\u0027 controls behavior on all ambiguous reads;\nAmbiguous2 excludes reads that map ambiguously within a single reference.\n * best Use the first best site (Default)\n * toss Consider unmapped\n * all Write a copy to the output for each reference to which it maps\n * split Write a copy to the AMBIGUOUS_ output for each reference to which it maps\n", + "enum": ["best", "toss", "all", "split"] + + + } + + + , + "qtrim": { + "type": + "string", + "description": "Type: `string`, choices: ``l`, `r`, `lr``. Quality-trim ends to Q5 before mapping", + "help_text": "Type: `string`, choices: ``l`, `r`, `lr``. Quality-trim ends to Q5 before mapping. Options are \u0027l\u0027 (left), \u0027r\u0027 (right), and \u0027lr\u0027 (both).\n", + "enum": ["l", "r", "lr"] + + + } + + + , + "untrim": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Undo trimming after mapping", + "help_text": "Type: `boolean_true`, default: `false`. Undo trimming after mapping. Untrimmed bases will be soft-clipped in cigar strings." + , + "default":false + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "index": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.index`, example: `BBSplit_index`. Location to write the index", + "help_text": "Type: `file`, default: `$id.$key.index`, example: `BBSplit_index`. Location to write the index.\n" + , + "default":"$id.$key.index" + } + + + , + "fastq_1": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.fastq_1.fastq`, example: `read_out1.fastq`. Output file for read 1", + "help_text": "Type: `file`, default: `$id.$key.fastq_1.fastq`, example: `read_out1.fastq`. Output file for read 1.\n" + , + "default":"$id.$key.fastq_1.fastq" + } + + + , + "fastq_2": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.fastq_2.fastq`, example: `read_out2.fastq`. Output file for read 2", + "help_text": "Type: `file`, default: `$id.$key.fastq_2.fastq`, example: `read_out2.fastq`. Output file for read 2.\n" + , + "default":"$id.$key.fastq_2.fastq" + } + + + , + "sam2bam": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.sam2bam.sh`, example: `script.sh`. Write a shell script to \u0027file\u0027 that will turn the sam output into a sorted, indexed bam file", + "help_text": "Type: `file`, default: `$id.$key.sam2bam.sh`, example: `script.sh`. Write a shell script to \u0027file\u0027 that will turn the sam output into a sorted, indexed bam file.\n" + , + "default":"$id.$key.sam2bam.sh" + } + + + , + "scafstats": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.scafstats.txt`, example: `scaffold_stats.txt`. Write statistics on how many reads mapped to which scaffold to this file", + "help_text": "Type: `file`, default: `$id.$key.scafstats.txt`, example: `scaffold_stats.txt`. Write statistics on how many reads mapped to which scaffold to this file.\n" + , + "default":"$id.$key.scafstats.txt" + } + + + , + "refstats": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.refstats.txt`, example: `reference_stats.txt`. Write statistics on how many reads were assigned to which reference to this file", + "help_text": "Type: `file`, default: `$id.$key.refstats.txt`, example: `reference_stats.txt`. Write statistics on how many reads were assigned to which reference to this file.\nUnmapped reads whose mate mapped to a reference are considered assigned and will be counted.\n" + , + "default":"$id.$key.refstats.txt" + } + + + , + "nzo": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Only print lines with nonzero coverage", + "help_text": "Type: `boolean_true`, default: `false`. Only print lines with nonzero coverage." + , + "default":false + } + + + , + "bbmap_args": { + "type": + "string", + "description": "Type: `string`. Additional arguments from BBMap to pass to BBSplit", + "help_text": "Type: `string`. Additional arguments from BBMap to pass to BBSplit.\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml new file mode 100644 index 0000000..02feaed --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml @@ -0,0 +1,378 @@ +name: "bedtools_genomecov" +namespace: "bedtools" +version: "v0.3.1" +authors: +- name: "Theodoro Gasperin Terra Camargo" + roles: + - "author" + - "maintainer" + info: + links: + email: "theodorogtc@gmail.com" + github: "tgaspe" + linkedin: "theodoro-gasperin-terra-camargo" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "The input file (BED/GFF/VCF) to be used.\n" + info: null + example: + - "input.bed" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_bam" + alternatives: + - "-ibam" + description: "The input file is in BAM format.\nNote: BAM _must_ be sorted by\ + \ positions.\n'--genome' option is ignored if you use '--input_bam' option!\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome" + alternatives: + - "-g" + description: "The genome file to be used.\n" + info: null + example: + - "genome.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "The output BED file. \n" + info: null + example: + - "output.bed" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Options" + arguments: + - type: "boolean_true" + name: "--depth" + alternatives: + - "-d" + description: "Report the depth at each genome position (with one-based coordinates).\n\ + Default behavior is to report a histogram.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--depth_zero" + alternatives: + - "-dz" + description: "Report the depth at each genome position (with zero-based coordinates).\n\ + Reports only non-zero positions.\nDefault behavior is to report a histogram.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--bed_graph" + alternatives: + - "-bg" + description: "Report depth in BedGraph format. For details, see:\ngenome.ucsc.edu/goldenPath/help/bedgraph.html\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--bed_graph_zero_coverage" + alternatives: + - "-bga" + description: "Report depth in BedGraph format, as above (-bg).\nHowever with this\ + \ option, regions with zero \ncoverage are also reported. This allows one to\n\ + quickly extract all regions of a genome with 0 \ncoverage by applying: \"grep\ + \ -w 0$\" to the output.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--split" + description: "Treat \"split\" BAM or BED12 entries as distinct BED intervals.\n\ + when computing coverage.\nFor BAM files, this uses the CIGAR \"N\" and \"D\"\ + \ operations \nto infer the blocks for computing coverage.\nFor BED12 files,\ + \ this uses the BlockCount, BlockStarts, and BlockEnds\nfields (i.e., columns\ + \ 10,11,12).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--ignore_deletion" + alternatives: + - "-ignoreD" + description: "Ignore local deletions (CIGAR \"D\" operations) in BAM entries\n\ + when computing coverage.\n" + info: null + direction: "input" + - type: "string" + name: "--strand" + description: "Calculate coverage of intervals from a specific strand.\nWith BED\ + \ files, requires at least 6 columns (strand is column 6). \n" + info: null + required: false + choices: + - "+" + - "-" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--pair_end_coverage" + alternatives: + - "-pc" + description: "Calculate coverage of pair-end fragments.\nWorks for BAM files only\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--fragment_size" + alternatives: + - "-fs" + description: "Force to use provided fragment size instead of read length\nWorks\ + \ for BAM files only\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--du" + description: "Change strand af the mate read (so both reads from the same strand)\ + \ useful for strand specific\nWorks for BAM files only\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--five_prime" + alternatives: + - "-5" + description: "Calculate coverage of 5\" positions (instead of entire interval).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--three_prime" + alternatives: + - "-3" + description: "Calculate coverage of 3\" positions (instead of entire interval).\n" + info: null + direction: "input" + - type: "integer" + name: "--max" + description: "Combine all positions with a depth >= max into\na single bin in\ + \ the histogram. Irrelevant\nfor -d and -bedGraph\n- (INTEGER)\n" + info: null + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--scale" + description: "Scale the coverage by a constant factor.\nEach coverage value is\ + \ multiplied by this factor before being reported.\nUseful for normalizing coverage\ + \ by, e.g., reads per million (RPM).\n- Default is 1.0; i.e., unscaled.\n- (FLOAT)\n" + info: null + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--trackline" + description: "Adds a UCSC/Genome-Browser track line definition in the first line\ + \ of the output.\n- See here for more details about track line definition:\n\ + \ http://genome.ucsc.edu/goldenPath/help/bedgraph.html\n- NOTE: When adding\ + \ a trackline definition, the output BedGraph can be easily\n uploaded\ + \ to the Genome Browser as a custom track,\n BUT CAN NOT be converted into\ + \ a BigWig file (w/o removing the first line).\n" + info: null + direction: "input" + - type: "string" + name: "--trackopts" + description: "Writes additional track line definition parameters in the first\ + \ line.\n- Example:\n -trackopts 'name=\"My Track\" visibility=2 color=255,30,30'\n\ + \ Note the use of single-quotes if you have spaces in your parameters.\n- (TEXT)\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Compute the coverage of a feature file among a genome.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "genome coverage" +- "BED" +- "GFF" +- "VCF" +- "BAM" +license: "MIT" +references: + doi: + - "10.1093/bioinformatics/btq033" +links: + repository: "https://github.com/arq5x/bedtools2" + homepage: "https://bedtools.readthedocs.io/en/latest/#" + documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html" + issue_tracker: "https://github.com/arq5x/bedtools2/issues" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:stable-slim" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "bedtools" + - "procps" + interactive: false + - type: "docker" + run: + - "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\ + \ > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/bedtools/bedtools_genomecov/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/bedtools/bedtools_genomecov" + executable: "target/nextflow/bedtools/bedtools_genomecov/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/main.nf new file mode 100644 index 0000000..9ddcebe --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/main.nf @@ -0,0 +1,4078 @@ +// bedtools_genomecov v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Theodoro Gasperin Terra Camargo (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "bedtools_genomecov", + "namespace" : "bedtools", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Theodoro Gasperin Terra Camargo", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "theodorogtc@gmail.com", + "github" : "tgaspe", + "linkedin" : "theodoro-gasperin-terra-camargo" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "The input file (BED/GFF/VCF) to be used.\n", + "example" : [ + "input.bed" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_bam", + "alternatives" : [ + "-ibam" + ], + "description" : "The input file is in BAM format.\nNote: BAM _must_ be sorted by positions.\n'--genome' option is ignored if you use '--input_bam' option!\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome", + "alternatives" : [ + "-g" + ], + "description" : "The genome file to be used.\n", + "example" : [ + "genome.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "The output BED file. \n", + "example" : [ + "output.bed" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--depth", + "alternatives" : [ + "-d" + ], + "description" : "Report the depth at each genome position (with one-based coordinates).\nDefault behavior is to report a histogram.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--depth_zero", + "alternatives" : [ + "-dz" + ], + "description" : "Report the depth at each genome position (with zero-based coordinates).\nReports only non-zero positions.\nDefault behavior is to report a histogram.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--bed_graph", + "alternatives" : [ + "-bg" + ], + "description" : "Report depth in BedGraph format. For details, see:\ngenome.ucsc.edu/goldenPath/help/bedgraph.html\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--bed_graph_zero_coverage", + "alternatives" : [ + "-bga" + ], + "description" : "Report depth in BedGraph format, as above (-bg).\nHowever with this option, regions with zero \ncoverage are also reported. This allows one to\nquickly extract all regions of a genome with 0 \ncoverage by applying: \\"grep -w 0$\\" to the output.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--split", + "description" : "Treat \\"split\\" BAM or BED12 entries as distinct BED intervals.\nwhen computing coverage.\nFor BAM files, this uses the CIGAR \\"N\\" and \\"D\\" operations \nto infer the blocks for computing coverage.\nFor BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds\nfields (i.e., columns 10,11,12).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--ignore_deletion", + "alternatives" : [ + "-ignoreD" + ], + "description" : "Ignore local deletions (CIGAR \\"D\\" operations) in BAM entries\nwhen computing coverage.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--strand", + "description" : "Calculate coverage of intervals from a specific strand.\nWith BED files, requires at least 6 columns (strand is column 6). \n", + "required" : false, + "choices" : [ + "+", + "-" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--pair_end_coverage", + "alternatives" : [ + "-pc" + ], + "description" : "Calculate coverage of pair-end fragments.\nWorks for BAM files only\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--fragment_size", + "alternatives" : [ + "-fs" + ], + "description" : "Force to use provided fragment size instead of read length\nWorks for BAM files only\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--du", + "description" : "Change strand af the mate read (so both reads from the same strand) useful for strand specific\nWorks for BAM files only\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--five_prime", + "alternatives" : [ + "-5" + ], + "description" : "Calculate coverage of 5\\" positions (instead of entire interval).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--three_prime", + "alternatives" : [ + "-3" + ], + "description" : "Calculate coverage of 3\\" positions (instead of entire interval).\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--max", + "description" : "Combine all positions with a depth >= max into\na single bin in the histogram. Irrelevant\nfor -d and -bedGraph\n- (INTEGER)\n", + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--scale", + "description" : "Scale the coverage by a constant factor.\nEach coverage value is multiplied by this factor before being reported.\nUseful for normalizing coverage by, e.g., reads per million (RPM).\n- Default is 1.0; i.e., unscaled.\n- (FLOAT)\n", + "required" : false, + "min" : 0.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--trackline", + "description" : "Adds a UCSC/Genome-Browser track line definition in the first line of the output.\n- See here for more details about track line definition:\n http://genome.ucsc.edu/goldenPath/help/bedgraph.html\n- NOTE: When adding a trackline definition, the output BedGraph can be easily\n uploaded to the Genome Browser as a custom track,\n BUT CAN NOT be converted into a BigWig file (w/o removing the first line).\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--trackopts", + "description" : "Writes additional track line definition parameters in the first line.\n- Example:\n -trackopts 'name=\\"My Track\\" visibility=2 color=255,30,30'\n Note the use of single-quotes if you have spaces in your parameters.\n- (TEXT)\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Compute the coverage of a feature file among a genome.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "genome coverage", + "BED", + "GFF", + "VCF", + "BAM" + ], + "license" : "MIT", + "references" : { + "doi" : [ + "10.1093/bioinformatics/btq033" + ] + }, + "links" : { + "repository" : "https://github.com/arq5x/bedtools2", + "homepage" : "https://bedtools.readthedocs.io/en/latest/#", + "documentation" : "https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html", + "issue_tracker" : "https://github.com/arq5x/bedtools2/issues" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "debian:stable-slim", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "bedtools", + "procps" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "echo \\"bedtools: \\\\\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\\\\"\\" > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/bedtools/bedtools_genomecov/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/bedtools/bedtools_genomecov", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_BAM+x} ]; then echo "${VIASH_PAR_INPUT_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_bam='&'#" ; else echo "# par_input_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_GENOME+x} ]; then echo "${VIASH_PAR_GENOME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome='&'#" ; else echo "# par_genome="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_DEPTH+x} ]; then echo "${VIASH_PAR_DEPTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_depth='&'#" ; else echo "# par_depth="; fi ) +$( if [ ! -z ${VIASH_PAR_DEPTH_ZERO+x} ]; then echo "${VIASH_PAR_DEPTH_ZERO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_depth_zero='&'#" ; else echo "# par_depth_zero="; fi ) +$( if [ ! -z ${VIASH_PAR_BED_GRAPH+x} ]; then echo "${VIASH_PAR_BED_GRAPH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bed_graph='&'#" ; else echo "# par_bed_graph="; fi ) +$( if [ ! -z ${VIASH_PAR_BED_GRAPH_ZERO_COVERAGE+x} ]; then echo "${VIASH_PAR_BED_GRAPH_ZERO_COVERAGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bed_graph_zero_coverage='&'#" ; else echo "# par_bed_graph_zero_coverage="; fi ) +$( if [ ! -z ${VIASH_PAR_SPLIT+x} ]; then echo "${VIASH_PAR_SPLIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_split='&'#" ; else echo "# par_split="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_DELETION+x} ]; then echo "${VIASH_PAR_IGNORE_DELETION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_deletion='&'#" ; else echo "# par_ignore_deletion="; fi ) +$( if [ ! -z ${VIASH_PAR_STRAND+x} ]; then echo "${VIASH_PAR_STRAND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strand='&'#" ; else echo "# par_strand="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIR_END_COVERAGE+x} ]; then echo "${VIASH_PAR_PAIR_END_COVERAGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pair_end_coverage='&'#" ; else echo "# par_pair_end_coverage="; fi ) +$( if [ ! -z ${VIASH_PAR_FRAGMENT_SIZE+x} ]; then echo "${VIASH_PAR_FRAGMENT_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_size='&'#" ; else echo "# par_fragment_size="; fi ) +$( if [ ! -z ${VIASH_PAR_DU+x} ]; then echo "${VIASH_PAR_DU}" | sed "s#'#'\\"'\\"'#g;s#.*#par_du='&'#" ; else echo "# par_du="; fi ) +$( if [ ! -z ${VIASH_PAR_FIVE_PRIME+x} ]; then echo "${VIASH_PAR_FIVE_PRIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_five_prime='&'#" ; else echo "# par_five_prime="; fi ) +$( if [ ! -z ${VIASH_PAR_THREE_PRIME+x} ]; then echo "${VIASH_PAR_THREE_PRIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_three_prime='&'#" ; else echo "# par_three_prime="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX+x} ]; then echo "${VIASH_PAR_MAX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max='&'#" ; else echo "# par_max="; fi ) +$( if [ ! -z ${VIASH_PAR_SCALE+x} ]; then echo "${VIASH_PAR_SCALE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_scale='&'#" ; else echo "# par_scale="; fi ) +$( if [ ! -z ${VIASH_PAR_TRACKLINE+x} ]; then echo "${VIASH_PAR_TRACKLINE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trackline='&'#" ; else echo "# par_trackline="; fi ) +$( if [ ! -z ${VIASH_PAR_TRACKOPTS+x} ]; then echo "${VIASH_PAR_TRACKOPTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trackopts='&'#" ; else echo "# par_trackopts="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +# Exit on error +set -eo pipefail + +# Unset variables +unset_if_false=( + par_input_bam + par_depth + par_depth_zero + par_bed_graph + par_bed_graph_zero_coverage + par_split + par_ignore_deletion + par_pair_end_coverage + par_fragment_size + par_du + par_five_prime + par_three_prime + par_trackline +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +# Create input array +IFS=";" read -ra trackopts <<< \\$par_trackopts + +bedtools genomecov \\\\ + \\${par_depth:+-d} \\\\ + \\${par_depth_zero:+-dz} \\\\ + \\${par_bed_graph:+-bg} \\\\ + \\${par_bed_graph_zero_coverage:+-bga} \\\\ + \\${par_split:+-split} \\\\ + \\${par_ignore_deletion:+-ignoreD} \\\\ + \\${par_du:+-du} \\\\ + \\${par_five_prime:+-5} \\\\ + \\${par_three_prime:+-3} \\\\ + \\${par_trackline:+-trackline} \\\\ + \\${par_strand:+-strand "\\$par_strand"} \\\\ + \\${par_max:+-max "\\$par_max"} \\\\ + \\${par_scale:+-scale "\\$par_scale"} \\\\ + \\${par_trackopts:+-trackopts "\\${trackopts[*]}"} \\\\ + \\${par_input_bam:+-ibam "\\$par_input_bam"} \\\\ + \\${par_input:+-i "\\$par_input"} \\\\ + \\${par_genome:+-g "\\$par_genome"} \\\\ + \\${par_pair_end_coverage:+-pc} \\\\ + \\${par_fragment_size:+-fs} \\\\ + > "\\$par_output" + +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/bedtools/bedtools_genomecov", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/nextflow.config new file mode 100644 index 0000000..fb62798 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'bedtools/bedtools_genomecov' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Compute the coverage of a feature file among a genome.\n' + author = 'Theodoro Gasperin Terra Camargo' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/nextflow_schema.json new file mode 100644 index 0000000..292d251 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/nextflow_schema.json @@ -0,0 +1,303 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "bedtools_genomecov", +"description": "Compute the coverage of a feature file among a genome.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, example: `input.bed`. The input file (BED/GFF/VCF) to be used", + "help_text": "Type: `file`, example: `input.bed`. The input file (BED/GFF/VCF) to be used.\n" + + } + + + , + "input_bam": { + "type": + "string", + "description": "Type: `file`. The input file is in BAM format", + "help_text": "Type: `file`. The input file is in BAM format.\nNote: BAM _must_ be sorted by positions.\n\u0027--genome\u0027 option is ignored if you use \u0027--input_bam\u0027 option!\n" + + } + + + , + "genome": { + "type": + "string", + "description": "Type: `file`, example: `genome.txt`. The genome file to be used", + "help_text": "Type: `file`, example: `genome.txt`. The genome file to be used.\n" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.bed`, example: `output.bed`. The output BED file", + "help_text": "Type: `file`, required, default: `$id.$key.output.bed`, example: `output.bed`. The output BED file. \n" + , + "default":"$id.$key.output.bed" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "depth": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with one-based coordinates)", + "help_text": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with one-based coordinates).\nDefault behavior is to report a histogram.\n" + , + "default":false + } + + + , + "depth_zero": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with zero-based coordinates)", + "help_text": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with zero-based coordinates).\nReports only non-zero positions.\nDefault behavior is to report a histogram.\n" + , + "default":false + } + + + , + "bed_graph": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format", + "help_text": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format. For details, see:\ngenome.ucsc.edu/goldenPath/help/bedgraph.html\n" + , + "default":false + } + + + , + "bed_graph_zero_coverage": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format, as above (-bg)", + "help_text": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format, as above (-bg).\nHowever with this option, regions with zero \ncoverage are also reported. This allows one to\nquickly extract all regions of a genome with 0 \ncoverage by applying: \"grep -w 0$\" to the output.\n" + , + "default":false + } + + + , + "split": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Treat \"split\" BAM or BED12 entries as distinct BED intervals", + "help_text": "Type: `boolean_true`, default: `false`. Treat \"split\" BAM or BED12 entries as distinct BED intervals.\nwhen computing coverage.\nFor BAM files, this uses the CIGAR \"N\" and \"D\" operations \nto infer the blocks for computing coverage.\nFor BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds\nfields (i.e., columns 10,11,12).\n" + , + "default":false + } + + + , + "ignore_deletion": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Ignore local deletions (CIGAR \"D\" operations) in BAM entries\nwhen computing coverage", + "help_text": "Type: `boolean_true`, default: `false`. Ignore local deletions (CIGAR \"D\" operations) in BAM entries\nwhen computing coverage.\n" + , + "default":false + } + + + , + "strand": { + "type": + "string", + "description": "Type: `string`, choices: ``+`, `-``. Calculate coverage of intervals from a specific strand", + "help_text": "Type: `string`, choices: ``+`, `-``. Calculate coverage of intervals from a specific strand.\nWith BED files, requires at least 6 columns (strand is column 6). \n", + "enum": ["+", "-"] + + + } + + + , + "pair_end_coverage": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Calculate coverage of pair-end fragments", + "help_text": "Type: `boolean_true`, default: `false`. Calculate coverage of pair-end fragments.\nWorks for BAM files only\n" + , + "default":false + } + + + , + "fragment_size": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Force to use provided fragment size instead of read length\nWorks for BAM files only\n", + "help_text": "Type: `boolean_true`, default: `false`. Force to use provided fragment size instead of read length\nWorks for BAM files only\n" + , + "default":false + } + + + , + "du": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Change strand af the mate read (so both reads from the same strand) useful for strand specific\nWorks for BAM files only\n", + "help_text": "Type: `boolean_true`, default: `false`. Change strand af the mate read (so both reads from the same strand) useful for strand specific\nWorks for BAM files only\n" + , + "default":false + } + + + , + "five_prime": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Calculate coverage of 5\" positions (instead of entire interval)", + "help_text": "Type: `boolean_true`, default: `false`. Calculate coverage of 5\" positions (instead of entire interval).\n" + , + "default":false + } + + + , + "three_prime": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Calculate coverage of 3\" positions (instead of entire interval)", + "help_text": "Type: `boolean_true`, default: `false`. Calculate coverage of 3\" positions (instead of entire interval).\n" + , + "default":false + } + + + , + "max": { + "type": + "integer", + "description": "Type: `integer`. Combine all positions with a depth \u003e= max into\na single bin in the histogram", + "help_text": "Type: `integer`. Combine all positions with a depth \u003e= max into\na single bin in the histogram. Irrelevant\nfor -d and -bedGraph\n- (INTEGER)\n" + + } + + + , + "scale": { + "type": + "number", + "description": "Type: `double`. Scale the coverage by a constant factor", + "help_text": "Type: `double`. Scale the coverage by a constant factor.\nEach coverage value is multiplied by this factor before being reported.\nUseful for normalizing coverage by, e.g., reads per million (RPM).\n- Default is 1.0; i.e., unscaled.\n- (FLOAT)\n" + + } + + + , + "trackline": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Adds a UCSC/Genome-Browser track line definition in the first line of the output", + "help_text": "Type: `boolean_true`, default: `false`. Adds a UCSC/Genome-Browser track line definition in the first line of the output.\n- See here for more details about track line definition:\n http://genome.ucsc.edu/goldenPath/help/bedgraph.html\n- NOTE: When adding a trackline definition, the output BedGraph can be easily\n uploaded to the Genome Browser as a custom track,\n BUT CAN NOT be converted into a BigWig file (w/o removing the first line).\n" + , + "default":false + } + + + , + "trackopts": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. Writes additional track line definition parameters in the first line", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. Writes additional track line definition parameters in the first line.\n- Example:\n -trackopts \u0027name=\"My Track\" visibility=2 color=255,30,30\u0027\n Note the use of single-quotes if you have spaces in your parameters.\n- (TEXT)\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/.config.vsh.yaml new file mode 100644 index 0000000..ff90438 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/.config.vsh.yaml @@ -0,0 +1,1124 @@ +name: "fastp" +version: "v0.3.1" +authors: +- name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" +argument_groups: +- name: "Inputs" + description: "`fastp` supports both single-end (SE) and paired-end (PE) input.\n\ + \n- for SE data, you only have to specify read1 input by `-i` or `--in1`.\n- for\ + \ PE data, you should also specify read2 input by `-I` or `--in2`.\n" + arguments: + - type: "file" + name: "--in1" + alternatives: + - "-i" + description: "Input FastQ file. Must be single-end or paired-end R1. Can be gzipped." + info: null + example: + - "in.R1.fq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--in2" + alternatives: + - "-I" + description: "Input FastQ file. Must be paired-end R2. Can be gzipped." + info: null + example: + - "in.R2.fq.gz" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + description: "\n- for SE data, you only have to specify read1 output by `-o` or\ + \ `--out1`.\n- for PE data, you should also specify read2 output by `-O` or `--out2`.\n\ + - if you don't specify the output file names, no output files will be written,\ + \ but the QC will still be done for both data before and after filtering.\n- the\ + \ output will be gzip-compressed if its file name ends with `.gz`\n" + arguments: + - type: "file" + name: "--out1" + alternatives: + - "-o" + description: "The single-end or paired-end R1 reads that pass QC. Will be gzipped\ + \ if its file name ends with `.gz`." + info: null + example: + - "out.R1.fq.gz" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--out2" + alternatives: + - "-O" + description: "The paired-end R2 reads that pass QC. Will be gzipped if its file\ + \ name ends with `.gz`." + info: null + example: + - "out.R2.fq.gz" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--unpaired1" + description: "Store the reads that `read1` passes filters but its paired `read2`\ + \ doesn't." + info: null + example: + - "unpaired.R1.fq.gz" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--unpaired2" + description: "Store the reads that `read2` passes filters but its paired `read1`\ + \ doesn't." + info: null + example: + - "unpaired.R2.fq.gz" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--failed_out" + description: "Store the reads that fail filters.\n\nIf one read failed and is\ + \ written to --failed_out, its failure reason will be appended to its read name.\ + \ For example, failed_quality_filter, failed_too_short etc.\nFor PE data, if\ + \ unpaired reads are not stored (by giving --unpaired1 or --unpaired2), the\ + \ failed pair of reads will be put together. If one read passes the filters\ + \ but its pair doesn't, the failure reason will be paired_read_is_failing.\n" + info: null + example: + - "failed.fq.gz" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--overlapped_out" + description: "For each read pair, output the overlapped region if it has no any\ + \ mismatched base.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Report output arguments" + arguments: + - type: "file" + name: "--json" + alternatives: + - "-j" + description: "The json format report file name\n" + info: null + example: + - "out.json" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--html" + description: "The html format report file name\n" + info: null + example: + - "out.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--report_title" + description: "The title of the html report, default is \"fastp report\".\n" + info: null + example: + - "fastp report" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Adapter trimming" + description: "Adapter trimming is enabled by default, but you can disable it by\ + \ `-A` or `--disable_adapter_trimming`. Adapter sequences can be automatically\ + \ detected for both PE/SE data.\n\n- For SE data, the adapters are evaluated by\ + \ analyzing the tails of first ~1M reads. This evaluation may be inacurrate, and\ + \ you can specify the adapter sequence by `-a` or `--adapter_sequence` option.\ + \ If adapter sequence is specified, the auto detection for SE data will be disabled.\n\ + - For PE data, the adapters can be detected by per-read overlap analysis, which\ + \ seeks for the overlap of each pair of reads. This method is robust and fast,\ + \ so normally you don't have to input the adapter sequence even you know it. But\ + \ you can still specify the adapter sequences for read1 by `--adapter_sequence`,\ + \ and for read2 by `--adapter_sequence_r2`. If `fastp` fails to find an overlap\ + \ (i.e. due to low quality bases), it will use these sequences to trim adapters\ + \ for read1 and read2 respectively.\n- For PE data, the adapter sequence auto-detection\ + \ is disabled by default since the adapters can be trimmed by overlap analysis.\ + \ However, you can specify `--detect_adapter_for_pe` to enable it.\n- For PE data,\ + \ `fastp` will run a little slower if you specify the sequence adapters or enable\ + \ adapter auto-detection, but usually result in a slightly cleaner output, since\ + \ the overlap analysis may fail due to sequencing errors or adapter dimers.\n\ + - The most widely used adapter is the Illumina TruSeq adapters. If your data is\ + \ from the TruSeq library, you can add `--adapter_sequence=AGATCGGAAGAGCACACGTCTGAACTCCAGTCA\ + \ --adapter_sequence_r2=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT` to your command lines,\ + \ or enable auto detection for PE data by specifing `detect_adapter_for_pe`.\n\ + - `fastp` contains some built-in known adapter sequences for better auto-detection.\ + \ If you want to make some adapters to be a part of the built-in adapters, please\ + \ file an issue.\n\nYou can also specify --adapter_fasta to give a FASTA file\ + \ to tell fastp to trim multiple adapters in this FASTA file. Here is a sample\ + \ of such adapter FASTA file:\n\n```\n>Illumina TruSeq Adapter Read 1\nAGATCGGAAGAGCACACGTCTGAACTCCAGTCA\n\ + >Illumina TruSeq Adapter Read 2\nAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT\n>polyA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n\ + ```\n\nThe adapter sequence in this file should be at least 6bp long, otherwise\ + \ it will be skipped. And you can give whatever you want to trim, rather than\ + \ regular sequencing adapters (i.e. polyA).\n\n`fastp` first trims the auto-detected\ + \ adapter or the adapter sequences given by `--adapter_sequence | --adapter_sequence_r2`,\ + \ then trims the adapters given by `--adapter_fasta` one by one.\n\nThe sequence\ + \ distribution of trimmed adapters can be found at the HTML/JSON reports.\n" + arguments: + - type: "boolean_true" + name: "--disable_adapter_trimming" + alternatives: + - "-A" + description: "Disable adapter trimming.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--detect_adapter_for_pe" + description: "By default, the auto-detection for adapter is for SE data input\ + \ only, turn on this option to enable it for PE data.\n" + info: null + direction: "input" + - type: "string" + name: "--adapter_sequence" + alternatives: + - "-a" + description: "The adapter sequences to be trimmed. For SE data, if not specified,\ + \ the adapters will be auto-detected. For PE data, this is used if R1/R2 are\ + \ found not overlapped\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--adapter_sequence_r2" + description: "The adapter sequences to be trimmed for R2. This is used for PE\ + \ data if R1/R2 are found overlapped.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--adapter_fasta" + description: "A FASTA file containing all the adapter sequences to be trimmed.\ + \ For SE data, if not specified, the adapters will be auto-detected. For PE\ + \ data, this is used if R1/R2 are found not overlapped.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Base trimming" + arguments: + - type: "integer" + name: "--trim_front1" + alternatives: + - "-f" + description: "Trimming how many bases in front for read1, default is 0.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--trim_tail1" + alternatives: + - "-t" + description: "Trimming how many bases in tail for read1, default is 0.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_len1" + alternatives: + - "-b" + description: "If read1 is longer than max_len1, then trim read1 at its tail to\ + \ make it as long as max_len1. Default 0 means no limitation.\n" + info: null + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--trim_front2" + alternatives: + - "-F" + description: "Trimming how many bases in front for read2, default is 0.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--trim_tail2" + alternatives: + - "-T" + description: "Trimming how many bases in tail for read2, default is 0.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_len2" + alternatives: + - "-B" + description: "If read2 is longer than max_len2, then trim read2 at its tail to\ + \ make it as long as max_len2. Default 0 means no limitation.\n" + info: null + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Merging mode" + description: "Allows merging paired-end reads into a single longer read if they\ + \ are overlapping." + arguments: + - type: "boolean_true" + name: "--merge" + alternatives: + - "-m" + description: "For paired-end input, merge each pair of reads into a single read\ + \ if they are overlapped. The merged reads will be written to the file given\ + \ by --merged_out, the unmerged reads will be written to the files specified\ + \ by --out1 and --out2. The merging mode is disabled by default.\n" + info: null + direction: "input" + - type: "file" + name: "--merged_out" + description: "In the merging mode, specify the file name to store merged output,\ + \ or specify --stdout to stream the merged output.\n" + info: null + example: + - "merged.fq.gz" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--include_unmerged" + description: "In the merging mode, write the unmerged or unpaired reads to the\ + \ file specified by --merge. Disabled by default.\n" + info: null + direction: "input" +- name: "Additional input arguments" + description: "Affects how the input is read." + arguments: + - type: "boolean_true" + name: "--interleaved_in" + description: "Indicate that is an interleaved FASTQ which contains both\ + \ read1 and read2. Disabled by default.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--fix_mgi_id" + description: "The MGI FASTQ ID format is not compatible with many BAM operation\ + \ tools, enable this option to fix it.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--phred64" + alternatives: + - "-6" + description: "Indicate the input is using phred64 scoring (it'll be converted\ + \ to phred33, so the output will still be phred33)\n" + info: null + direction: "input" +- name: "Additional output arguments" + description: "Affects how the output is written." + arguments: + - type: "integer" + name: "--compression" + alternatives: + - "-z" + description: "Compression level for gzip output (1 ~ 9). 1 is fastest, 9 is smallest,\ + \ default is 4.\n" + info: null + example: + - 4 + required: false + min: 1 + max: 9 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--dont_overwrite" + description: "Don't overwrite existing files. Overwritting is allowed by default.\n" + info: null + direction: "input" +- name: "Logging arguments" + arguments: + - type: "boolean_true" + name: "--verbose" + alternatives: + - "-V" + description: "Output verbose log information (i.e. when every 1M reads are processed)." + info: null + direction: "input" +- name: "Processing arguments" + arguments: + - type: "long" + name: "--reads_to_process" + description: "Specify how many reads/pairs to be processed. Default 0 means process\ + \ all reads.\n" + info: null + example: + - 1000000 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Deduplication arguments" + arguments: + - type: "boolean_true" + name: "--dedup" + description: "Enable deduplication to drop the duplicated reads/pairs\n" + info: null + direction: "input" + - type: "integer" + name: "--dup_calc_accuracy" + description: "Accuracy level to calculate duplication (1~6). Higher level uses\ + \ more memory (1G, 2G, 4G, 8G, 16G, 24G). Default 1 for no-dedup mode, and 3\ + \ for dedup mode.\n" + info: null + example: + - 3 + required: false + min: 1 + max: 6 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--dont_eval_duplication" + description: "Don't evaluate duplication rate to save time and use less memory.\n" + info: null + direction: "input" +- name: "PolyG tail trimming arguments" + arguments: + - type: "boolean_true" + name: "--trim_poly_g" + alternatives: + - "-g" + description: "Force polyG tail trimming, by default trimming is automatically\ + \ enabled for Illumina NextSeq/NovaSeq data\n" + info: null + direction: "input" + - type: "integer" + name: "--poly_g_min_len" + description: "The minimum length to detect polyG in the read tail. 10 by default.\n" + info: null + example: + - 10 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--disable_trim_poly_g" + alternatives: + - "-G" + description: "Disable polyG tail trimming, by default trimming is automatically\ + \ enabled for Illumina NextSeq/NovaSeq data\n" + info: null + direction: "input" +- name: "PolyX tail trimming arguments" + arguments: + - type: "boolean_true" + name: "--trim_poly_x" + alternatives: + - "-x" + description: "Enable polyX trimming in 3' ends.\n" + info: null + direction: "input" + - type: "integer" + name: "--poly_x_min_len" + description: "The minimum length to detect polyX in the read tail. 10 by default.\n" + info: null + example: + - 10 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Cut arguments" + arguments: + - type: "integer" + name: "--cut_front" + alternatives: + - "-5" + description: "Move a sliding window from front (5') to tail, drop the bases in\ + \ the window if its mean quality < threshold, stop otherwise.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--cut_tail" + alternatives: + - "-3" + description: "Move a sliding window from tail (3') to front, drop the bases in\ + \ the window if its mean quality < threshold, stop otherwise.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--cut_right" + alternatives: + - "-r" + description: "Move a sliding window from front to tail, if meet one window with\ + \ mean quality < threshold, drop the bases in the window and the right part,\ + \ and then stop.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--cut_window_size" + alternatives: + - "-W" + description: "The window size option shared by cut_front, cut_tail or cut_sliding.\ + \ Range: 1~1000, default: 4.\n" + info: null + example: + - 4 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--cut_mean_quality" + alternatives: + - "-M" + description: "The mean quality requirement option shared by cut_front, cut_tail\ + \ or cut_sliding. Range: 1~36 default: 20 (Q20)\n" + info: null + example: + - 20 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--cut_front_window_size" + description: "The window size option of cut_front, default to cut_window_size\ + \ if not specified.\n" + info: null + example: + - 4 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--cut_front_mean_quality" + description: "The mean quality requirement option of cut_front, default to cut_mean_quality\ + \ if not specified.\n" + info: null + example: + - 20 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--cut_tail_window_size" + description: "The window size option of cut_tail, default to cut_window_size if\ + \ not specified.\n" + info: null + example: + - 4 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--cut_tail_mean_quality" + description: "The mean quality requirement option of cut_tail, default to cut_mean_quality\ + \ if not specified.\n" + info: null + example: + - 20 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--cut_right_window_size" + description: "The window size option of cut_right, default to cut_window_size\ + \ if not specified.\n" + info: null + example: + - 4 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--cut_right_mean_quality" + description: "The mean quality requirement option of cut_right, default to cut_mean_quality\ + \ if not specified.\n" + info: null + example: + - 20 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Quality filtering arguments" + arguments: + - type: "boolean_true" + name: "--disable_quality_filtering" + alternatives: + - "-Q" + description: "Quality filtering is enabled by default. If this option is specified,\ + \ quality filtering is disabled.\n" + info: null + direction: "input" + - type: "integer" + name: "--qualified_quality_phred" + alternatives: + - "-q" + description: "The quality value that a base is qualified. Default 15 means phred\ + \ quality >=Q15 is qualified.\n" + info: null + example: + - 15 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--unqualified_percent_limit" + alternatives: + - "-u" + description: "How many percents of bases are allowed to be unqualified (0~100).\ + \ Default 40 means 40%.\n" + info: null + example: + - 40 + required: false + min: 0 + max: 100 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--n_base_limit" + alternatives: + - "-n" + description: "If one read's number of N base is >n_base_limit, then this read/pair\ + \ is discarded. Default is 5.\n" + info: null + example: + - 5 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--average_qual" + alternatives: + - "-e" + description: "If one read's average quality score &1 | sed 's# #: \"#;s#$#\"#' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/fastp/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/fastp" + executable: "target/nextflow/fastp/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/main.nf new file mode 100644 index 0000000..95a14df --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/main.nf @@ -0,0 +1,5007 @@ +// fastp v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "fastp", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "description" : "`fastp` supports both single-end (SE) and paired-end (PE) input.\n\n- for SE data, you only have to specify read1 input by `-i` or `--in1`.\n- for PE data, you should also specify read2 input by `-I` or `--in2`.\n", + "arguments" : [ + { + "type" : "file", + "name" : "--in1", + "alternatives" : [ + "-i" + ], + "description" : "Input FastQ file. Must be single-end or paired-end R1. Can be gzipped.", + "example" : [ + "in.R1.fq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--in2", + "alternatives" : [ + "-I" + ], + "description" : "Input FastQ file. Must be paired-end R2. Can be gzipped.", + "example" : [ + "in.R2.fq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "description" : "\n- for SE data, you only have to specify read1 output by `-o` or `--out1`.\n- for PE data, you should also specify read2 output by `-O` or `--out2`.\n- if you don't specify the output file names, no output files will be written, but the QC will still be done for both data before and after filtering.\n- the output will be gzip-compressed if its file name ends with `.gz`\n", + "arguments" : [ + { + "type" : "file", + "name" : "--out1", + "alternatives" : [ + "-o" + ], + "description" : "The single-end or paired-end R1 reads that pass QC. Will be gzipped if its file name ends with `.gz`.", + "example" : [ + "out.R1.fq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--out2", + "alternatives" : [ + "-O" + ], + "description" : "The paired-end R2 reads that pass QC. Will be gzipped if its file name ends with `.gz`.", + "example" : [ + "out.R2.fq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--unpaired1", + "description" : "Store the reads that `read1` passes filters but its paired `read2` doesn't.", + "example" : [ + "unpaired.R1.fq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--unpaired2", + "description" : "Store the reads that `read2` passes filters but its paired `read1` doesn't.", + "example" : [ + "unpaired.R2.fq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--failed_out", + "description" : "Store the reads that fail filters.\n\nIf one read failed and is written to --failed_out, its failure reason will be appended to its read name. For example, failed_quality_filter, failed_too_short etc.\nFor PE data, if unpaired reads are not stored (by giving --unpaired1 or --unpaired2), the failed pair of reads will be put together. If one read passes the filters but its pair doesn't, the failure reason will be paired_read_is_failing.\n", + "example" : [ + "failed.fq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--overlapped_out", + "description" : "For each read pair, output the overlapped region if it has no any mismatched base.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Report output arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--json", + "alternatives" : [ + "-j" + ], + "description" : "The json format report file name\n", + "example" : [ + "out.json" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--html", + "description" : "The html format report file name\n", + "example" : [ + "out.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--report_title", + "description" : "The title of the html report, default is \\"fastp report\\".\n", + "example" : [ + "fastp report" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Adapter trimming", + "description" : "Adapter trimming is enabled by default, but you can disable it by `-A` or `--disable_adapter_trimming`. Adapter sequences can be automatically detected for both PE/SE data.\n\n- For SE data, the adapters are evaluated by analyzing the tails of first ~1M reads. This evaluation may be inacurrate, and you can specify the adapter sequence by `-a` or `--adapter_sequence` option. If adapter sequence is specified, the auto detection for SE data will be disabled.\n- For PE data, the adapters can be detected by per-read overlap analysis, which seeks for the overlap of each pair of reads. This method is robust and fast, so normally you don't have to input the adapter sequence even you know it. But you can still specify the adapter sequences for read1 by `--adapter_sequence`, and for read2 by `--adapter_sequence_r2`. If `fastp` fails to find an overlap (i.e. due to low quality bases), it will use these sequences to trim adapters for read1 and read2 respectively.\n- For PE data, the adapter sequence auto-detection is disabled by default since the adapters can be trimmed by overlap analysis. However, you can specify `--detect_adapter_for_pe` to enable it.\n- For PE data, `fastp` will run a little slower if you specify the sequence adapters or enable adapter auto-detection, but usually result in a slightly cleaner output, since the overlap analysis may fail due to sequencing errors or adapter dimers.\n- The most widely used adapter is the Illumina TruSeq adapters. If your data is from the TruSeq library, you can add `--adapter_sequence=AGATCGGAAGAGCACACGTCTGAACTCCAGTCA --adapter_sequence_r2=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT` to your command lines, or enable auto detection for PE data by specifing `detect_adapter_for_pe`.\n- `fastp` contains some built-in known adapter sequences for better auto-detection. If you want to make some adapters to be a part of the built-in adapters, please file an issue.\n\nYou can also specify --adapter_fasta to give a FASTA file to tell fastp to trim multiple adapters in this FASTA file. Here is a sample of such adapter FASTA file:\n\n```\n>Illumina TruSeq Adapter Read 1\nAGATCGGAAGAGCACACGTCTGAACTCCAGTCA\n>Illumina TruSeq Adapter Read 2\nAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT\n>polyA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n```\n\nThe adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. And you can give whatever you want to trim, rather than regular sequencing adapters (i.e. polyA).\n\n`fastp` first trims the auto-detected adapter or the adapter sequences given by `--adapter_sequence | --adapter_sequence_r2`, then trims the adapters given by `--adapter_fasta` one by one.\n\nThe sequence distribution of trimmed adapters can be found at the HTML/JSON reports.\n", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--disable_adapter_trimming", + "alternatives" : [ + "-A" + ], + "description" : "Disable adapter trimming.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--detect_adapter_for_pe", + "description" : "By default, the auto-detection for adapter is for SE data input only, turn on this option to enable it for PE data.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--adapter_sequence", + "alternatives" : [ + "-a" + ], + "description" : "The adapter sequences to be trimmed. For SE data, if not specified, the adapters will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--adapter_sequence_r2", + "description" : "The adapter sequences to be trimmed for R2. This is used for PE data if R1/R2 are found overlapped.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--adapter_fasta", + "description" : "A FASTA file containing all the adapter sequences to be trimmed. For SE data, if not specified, the adapters will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Base trimming", + "arguments" : [ + { + "type" : "integer", + "name" : "--trim_front1", + "alternatives" : [ + "-f" + ], + "description" : "Trimming how many bases in front for read1, default is 0.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--trim_tail1", + "alternatives" : [ + "-t" + ], + "description" : "Trimming how many bases in tail for read1, default is 0.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--max_len1", + "alternatives" : [ + "-b" + ], + "description" : "If read1 is longer than max_len1, then trim read1 at its tail to make it as long as max_len1. Default 0 means no limitation.\n", + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--trim_front2", + "alternatives" : [ + "-F" + ], + "description" : "Trimming how many bases in front for read2, default is 0.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--trim_tail2", + "alternatives" : [ + "-T" + ], + "description" : "Trimming how many bases in tail for read2, default is 0.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--max_len2", + "alternatives" : [ + "-B" + ], + "description" : "If read2 is longer than max_len2, then trim read2 at its tail to make it as long as max_len2. Default 0 means no limitation.\n", + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Merging mode", + "description" : "Allows merging paired-end reads into a single longer read if they are overlapping.", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--merge", + "alternatives" : [ + "-m" + ], + "description" : "For paired-end input, merge each pair of reads into a single read if they are overlapped. The merged reads will be written to the file given by --merged_out, the unmerged reads will be written to the files specified by --out1 and --out2. The merging mode is disabled by default.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--merged_out", + "description" : "In the merging mode, specify the file name to store merged output, or specify --stdout to stream the merged output.\n", + "example" : [ + "merged.fq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--include_unmerged", + "description" : "In the merging mode, write the unmerged or unpaired reads to the file specified by --merge. Disabled by default.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Additional input arguments", + "description" : "Affects how the input is read.", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--interleaved_in", + "description" : "Indicate that is an interleaved FASTQ which contains both read1 and read2. Disabled by default.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--fix_mgi_id", + "description" : "The MGI FASTQ ID format is not compatible with many BAM operation tools, enable this option to fix it.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--phred64", + "alternatives" : [ + "-6" + ], + "description" : "Indicate the input is using phred64 scoring (it'll be converted to phred33, so the output will still be phred33)\n", + "direction" : "input" + } + ] + }, + { + "name" : "Additional output arguments", + "description" : "Affects how the output is written.", + "arguments" : [ + { + "type" : "integer", + "name" : "--compression", + "alternatives" : [ + "-z" + ], + "description" : "Compression level for gzip output (1 ~ 9). 1 is fastest, 9 is smallest, default is 4.\n", + "example" : [ + 4 + ], + "required" : false, + "min" : 1, + "max" : 9, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--dont_overwrite", + "description" : "Don't overwrite existing files. Overwritting is allowed by default.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Logging arguments", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--verbose", + "alternatives" : [ + "-V" + ], + "description" : "Output verbose log information (i.e. when every 1M reads are processed).", + "direction" : "input" + } + ] + }, + { + "name" : "Processing arguments", + "arguments" : [ + { + "type" : "long", + "name" : "--reads_to_process", + "description" : "Specify how many reads/pairs to be processed. Default 0 means process all reads.\n", + "example" : [ + 1000000 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Deduplication arguments", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--dedup", + "description" : "Enable deduplication to drop the duplicated reads/pairs\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--dup_calc_accuracy", + "description" : "Accuracy level to calculate duplication (1~6). Higher level uses more memory (1G, 2G, 4G, 8G, 16G, 24G). Default 1 for no-dedup mode, and 3 for dedup mode.\n", + "example" : [ + 3 + ], + "required" : false, + "min" : 1, + "max" : 6, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--dont_eval_duplication", + "description" : "Don't evaluate duplication rate to save time and use less memory.\n", + "direction" : "input" + } + ] + }, + { + "name" : "PolyG tail trimming arguments", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--trim_poly_g", + "alternatives" : [ + "-g" + ], + "description" : "Force polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--poly_g_min_len", + "description" : "The minimum length to detect polyG in the read tail. 10 by default.\n", + "example" : [ + 10 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--disable_trim_poly_g", + "alternatives" : [ + "-G" + ], + "description" : "Disable polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data\n", + "direction" : "input" + } + ] + }, + { + "name" : "PolyX tail trimming arguments", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--trim_poly_x", + "alternatives" : [ + "-x" + ], + "description" : "Enable polyX trimming in 3' ends.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--poly_x_min_len", + "description" : "The minimum length to detect polyX in the read tail. 10 by default.\n", + "example" : [ + 10 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Cut arguments", + "arguments" : [ + { + "type" : "integer", + "name" : "--cut_front", + "alternatives" : [ + "-5" + ], + "description" : "Move a sliding window from front (5') to tail, drop the bases in the window if its mean quality < threshold, stop otherwise.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--cut_tail", + "alternatives" : [ + "-3" + ], + "description" : "Move a sliding window from tail (3') to front, drop the bases in the window if its mean quality < threshold, stop otherwise.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--cut_right", + "alternatives" : [ + "-r" + ], + "description" : "Move a sliding window from front to tail, if meet one window with mean quality < threshold, drop the bases in the window and the right part, and then stop.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--cut_window_size", + "alternatives" : [ + "-W" + ], + "description" : "The window size option shared by cut_front, cut_tail or cut_sliding. Range: 1~1000, default: 4.\n", + "example" : [ + 4 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--cut_mean_quality", + "alternatives" : [ + "-M" + ], + "description" : "The mean quality requirement option shared by cut_front, cut_tail or cut_sliding. Range: 1~36 default: 20 (Q20)\n", + "example" : [ + 20 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--cut_front_window_size", + "description" : "The window size option of cut_front, default to cut_window_size if not specified.\n", + "example" : [ + 4 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--cut_front_mean_quality", + "description" : "The mean quality requirement option of cut_front, default to cut_mean_quality if not specified.\n", + "example" : [ + 20 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--cut_tail_window_size", + "description" : "The window size option of cut_tail, default to cut_window_size if not specified.\n", + "example" : [ + 4 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--cut_tail_mean_quality", + "description" : "The mean quality requirement option of cut_tail, default to cut_mean_quality if not specified.\n", + "example" : [ + 20 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--cut_right_window_size", + "description" : "The window size option of cut_right, default to cut_window_size if not specified.\n", + "example" : [ + 4 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--cut_right_mean_quality", + "description" : "The mean quality requirement option of cut_right, default to cut_mean_quality if not specified.\n", + "example" : [ + 20 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Quality filtering arguments", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--disable_quality_filtering", + "alternatives" : [ + "-Q" + ], + "description" : "Quality filtering is enabled by default. If this option is specified, quality filtering is disabled.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--qualified_quality_phred", + "alternatives" : [ + "-q" + ], + "description" : "The quality value that a base is qualified. Default 15 means phred quality >=Q15 is qualified.\n", + "example" : [ + 15 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--unqualified_percent_limit", + "alternatives" : [ + "-u" + ], + "description" : "How many percents of bases are allowed to be unqualified (0~100). Default 40 means 40%.\n", + "example" : [ + 40 + ], + "required" : false, + "min" : 0, + "max" : 100, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--n_base_limit", + "alternatives" : [ + "-n" + ], + "description" : "If one read's number of N base is >n_base_limit, then this read/pair is discarded. Default is 5.\n", + "example" : [ + 5 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--average_qual", + "alternatives" : [ + "-e" + ], + "description" : "If one read's average quality score &1 | sed 's# #: \\"#;s#$#\\"#' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/fastp/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/fastp", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_IN1+x} ]; then echo "${VIASH_PAR_IN1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_in1='&'#" ; else echo "# par_in1="; fi ) +$( if [ ! -z ${VIASH_PAR_IN2+x} ]; then echo "${VIASH_PAR_IN2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_in2='&'#" ; else echo "# par_in2="; fi ) +$( if [ ! -z ${VIASH_PAR_OUT1+x} ]; then echo "${VIASH_PAR_OUT1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_out1='&'#" ; else echo "# par_out1="; fi ) +$( if [ ! -z ${VIASH_PAR_OUT2+x} ]; then echo "${VIASH_PAR_OUT2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_out2='&'#" ; else echo "# par_out2="; fi ) +$( if [ ! -z ${VIASH_PAR_UNPAIRED1+x} ]; then echo "${VIASH_PAR_UNPAIRED1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_unpaired1='&'#" ; else echo "# par_unpaired1="; fi ) +$( if [ ! -z ${VIASH_PAR_UNPAIRED2+x} ]; then echo "${VIASH_PAR_UNPAIRED2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_unpaired2='&'#" ; else echo "# par_unpaired2="; fi ) +$( if [ ! -z ${VIASH_PAR_FAILED_OUT+x} ]; then echo "${VIASH_PAR_FAILED_OUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_failed_out='&'#" ; else echo "# par_failed_out="; fi ) +$( if [ ! -z ${VIASH_PAR_OVERLAPPED_OUT+x} ]; then echo "${VIASH_PAR_OVERLAPPED_OUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_overlapped_out='&'#" ; else echo "# par_overlapped_out="; fi ) +$( if [ ! -z ${VIASH_PAR_JSON+x} ]; then echo "${VIASH_PAR_JSON}" | sed "s#'#'\\"'\\"'#g;s#.*#par_json='&'#" ; else echo "# par_json="; fi ) +$( if [ ! -z ${VIASH_PAR_HTML+x} ]; then echo "${VIASH_PAR_HTML}" | sed "s#'#'\\"'\\"'#g;s#.*#par_html='&'#" ; else echo "# par_html="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORT_TITLE+x} ]; then echo "${VIASH_PAR_REPORT_TITLE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_report_title='&'#" ; else echo "# par_report_title="; fi ) +$( if [ ! -z ${VIASH_PAR_DISABLE_ADAPTER_TRIMMING+x} ]; then echo "${VIASH_PAR_DISABLE_ADAPTER_TRIMMING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_disable_adapter_trimming='&'#" ; else echo "# par_disable_adapter_trimming="; fi ) +$( if [ ! -z ${VIASH_PAR_DETECT_ADAPTER_FOR_PE+x} ]; then echo "${VIASH_PAR_DETECT_ADAPTER_FOR_PE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_detect_adapter_for_pe='&'#" ; else echo "# par_detect_adapter_for_pe="; fi ) +$( if [ ! -z ${VIASH_PAR_ADAPTER_SEQUENCE+x} ]; then echo "${VIASH_PAR_ADAPTER_SEQUENCE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapter_sequence='&'#" ; else echo "# par_adapter_sequence="; fi ) +$( if [ ! -z ${VIASH_PAR_ADAPTER_SEQUENCE_R2+x} ]; then echo "${VIASH_PAR_ADAPTER_SEQUENCE_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapter_sequence_r2='&'#" ; else echo "# par_adapter_sequence_r2="; fi ) +$( if [ ! -z ${VIASH_PAR_ADAPTER_FASTA+x} ]; then echo "${VIASH_PAR_ADAPTER_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapter_fasta='&'#" ; else echo "# par_adapter_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIM_FRONT1+x} ]; then echo "${VIASH_PAR_TRIM_FRONT1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trim_front1='&'#" ; else echo "# par_trim_front1="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIM_TAIL1+x} ]; then echo "${VIASH_PAR_TRIM_TAIL1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trim_tail1='&'#" ; else echo "# par_trim_tail1="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_LEN1+x} ]; then echo "${VIASH_PAR_MAX_LEN1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_len1='&'#" ; else echo "# par_max_len1="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIM_FRONT2+x} ]; then echo "${VIASH_PAR_TRIM_FRONT2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trim_front2='&'#" ; else echo "# par_trim_front2="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIM_TAIL2+x} ]; then echo "${VIASH_PAR_TRIM_TAIL2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trim_tail2='&'#" ; else echo "# par_trim_tail2="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_LEN2+x} ]; then echo "${VIASH_PAR_MAX_LEN2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_len2='&'#" ; else echo "# par_max_len2="; fi ) +$( if [ ! -z ${VIASH_PAR_MERGE+x} ]; then echo "${VIASH_PAR_MERGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_merge='&'#" ; else echo "# par_merge="; fi ) +$( if [ ! -z ${VIASH_PAR_MERGED_OUT+x} ]; then echo "${VIASH_PAR_MERGED_OUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_merged_out='&'#" ; else echo "# par_merged_out="; fi ) +$( if [ ! -z ${VIASH_PAR_INCLUDE_UNMERGED+x} ]; then echo "${VIASH_PAR_INCLUDE_UNMERGED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_include_unmerged='&'#" ; else echo "# par_include_unmerged="; fi ) +$( if [ ! -z ${VIASH_PAR_INTERLEAVED_IN+x} ]; then echo "${VIASH_PAR_INTERLEAVED_IN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_interleaved_in='&'#" ; else echo "# par_interleaved_in="; fi ) +$( if [ ! -z ${VIASH_PAR_FIX_MGI_ID+x} ]; then echo "${VIASH_PAR_FIX_MGI_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fix_mgi_id='&'#" ; else echo "# par_fix_mgi_id="; fi ) +$( if [ ! -z ${VIASH_PAR_PHRED64+x} ]; then echo "${VIASH_PAR_PHRED64}" | sed "s#'#'\\"'\\"'#g;s#.*#par_phred64='&'#" ; else echo "# par_phred64="; fi ) +$( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "${VIASH_PAR_COMPRESSION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_compression='&'#" ; else echo "# par_compression="; fi ) +$( if [ ! -z ${VIASH_PAR_DONT_OVERWRITE+x} ]; then echo "${VIASH_PAR_DONT_OVERWRITE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dont_overwrite='&'#" ; else echo "# par_dont_overwrite="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_PAR_READS_TO_PROCESS+x} ]; then echo "${VIASH_PAR_READS_TO_PROCESS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reads_to_process='&'#" ; else echo "# par_reads_to_process="; fi ) +$( if [ ! -z ${VIASH_PAR_DEDUP+x} ]; then echo "${VIASH_PAR_DEDUP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dedup='&'#" ; else echo "# par_dedup="; fi ) +$( if [ ! -z ${VIASH_PAR_DUP_CALC_ACCURACY+x} ]; then echo "${VIASH_PAR_DUP_CALC_ACCURACY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dup_calc_accuracy='&'#" ; else echo "# par_dup_calc_accuracy="; fi ) +$( if [ ! -z ${VIASH_PAR_DONT_EVAL_DUPLICATION+x} ]; then echo "${VIASH_PAR_DONT_EVAL_DUPLICATION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dont_eval_duplication='&'#" ; else echo "# par_dont_eval_duplication="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIM_POLY_G+x} ]; then echo "${VIASH_PAR_TRIM_POLY_G}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trim_poly_g='&'#" ; else echo "# par_trim_poly_g="; fi ) +$( if [ ! -z ${VIASH_PAR_POLY_G_MIN_LEN+x} ]; then echo "${VIASH_PAR_POLY_G_MIN_LEN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_poly_g_min_len='&'#" ; else echo "# par_poly_g_min_len="; fi ) +$( if [ ! -z ${VIASH_PAR_DISABLE_TRIM_POLY_G+x} ]; then echo "${VIASH_PAR_DISABLE_TRIM_POLY_G}" | sed "s#'#'\\"'\\"'#g;s#.*#par_disable_trim_poly_g='&'#" ; else echo "# par_disable_trim_poly_g="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIM_POLY_X+x} ]; then echo "${VIASH_PAR_TRIM_POLY_X}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trim_poly_x='&'#" ; else echo "# par_trim_poly_x="; fi ) +$( if [ ! -z ${VIASH_PAR_POLY_X_MIN_LEN+x} ]; then echo "${VIASH_PAR_POLY_X_MIN_LEN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_poly_x_min_len='&'#" ; else echo "# par_poly_x_min_len="; fi ) +$( if [ ! -z ${VIASH_PAR_CUT_FRONT+x} ]; then echo "${VIASH_PAR_CUT_FRONT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cut_front='&'#" ; else echo "# par_cut_front="; fi ) +$( if [ ! -z ${VIASH_PAR_CUT_TAIL+x} ]; then echo "${VIASH_PAR_CUT_TAIL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cut_tail='&'#" ; else echo "# par_cut_tail="; fi ) +$( if [ ! -z ${VIASH_PAR_CUT_RIGHT+x} ]; then echo "${VIASH_PAR_CUT_RIGHT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cut_right='&'#" ; else echo "# par_cut_right="; fi ) +$( if [ ! -z ${VIASH_PAR_CUT_WINDOW_SIZE+x} ]; then echo "${VIASH_PAR_CUT_WINDOW_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cut_window_size='&'#" ; else echo "# par_cut_window_size="; fi ) +$( if [ ! -z ${VIASH_PAR_CUT_MEAN_QUALITY+x} ]; then echo "${VIASH_PAR_CUT_MEAN_QUALITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cut_mean_quality='&'#" ; else echo "# par_cut_mean_quality="; fi ) +$( if [ ! -z ${VIASH_PAR_CUT_FRONT_WINDOW_SIZE+x} ]; then echo "${VIASH_PAR_CUT_FRONT_WINDOW_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cut_front_window_size='&'#" ; else echo "# par_cut_front_window_size="; fi ) +$( if [ ! -z ${VIASH_PAR_CUT_FRONT_MEAN_QUALITY+x} ]; then echo "${VIASH_PAR_CUT_FRONT_MEAN_QUALITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cut_front_mean_quality='&'#" ; else echo "# par_cut_front_mean_quality="; fi ) +$( if [ ! -z ${VIASH_PAR_CUT_TAIL_WINDOW_SIZE+x} ]; then echo "${VIASH_PAR_CUT_TAIL_WINDOW_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cut_tail_window_size='&'#" ; else echo "# par_cut_tail_window_size="; fi ) +$( if [ ! -z ${VIASH_PAR_CUT_TAIL_MEAN_QUALITY+x} ]; then echo "${VIASH_PAR_CUT_TAIL_MEAN_QUALITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cut_tail_mean_quality='&'#" ; else echo "# par_cut_tail_mean_quality="; fi ) +$( if [ ! -z ${VIASH_PAR_CUT_RIGHT_WINDOW_SIZE+x} ]; then echo "${VIASH_PAR_CUT_RIGHT_WINDOW_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cut_right_window_size='&'#" ; else echo "# par_cut_right_window_size="; fi ) +$( if [ ! -z ${VIASH_PAR_CUT_RIGHT_MEAN_QUALITY+x} ]; then echo "${VIASH_PAR_CUT_RIGHT_MEAN_QUALITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cut_right_mean_quality='&'#" ; else echo "# par_cut_right_mean_quality="; fi ) +$( if [ ! -z ${VIASH_PAR_DISABLE_QUALITY_FILTERING+x} ]; then echo "${VIASH_PAR_DISABLE_QUALITY_FILTERING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_disable_quality_filtering='&'#" ; else echo "# par_disable_quality_filtering="; fi ) +$( if [ ! -z ${VIASH_PAR_QUALIFIED_QUALITY_PHRED+x} ]; then echo "${VIASH_PAR_QUALIFIED_QUALITY_PHRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qualified_quality_phred='&'#" ; else echo "# par_qualified_quality_phred="; fi ) +$( if [ ! -z ${VIASH_PAR_UNQUALIFIED_PERCENT_LIMIT+x} ]; then echo "${VIASH_PAR_UNQUALIFIED_PERCENT_LIMIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_unqualified_percent_limit='&'#" ; else echo "# par_unqualified_percent_limit="; fi ) +$( if [ ! -z ${VIASH_PAR_N_BASE_LIMIT+x} ]; then echo "${VIASH_PAR_N_BASE_LIMIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_n_base_limit='&'#" ; else echo "# par_n_base_limit="; fi ) +$( if [ ! -z ${VIASH_PAR_AVERAGE_QUAL+x} ]; then echo "${VIASH_PAR_AVERAGE_QUAL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_average_qual='&'#" ; else echo "# par_average_qual="; fi ) +$( if [ ! -z ${VIASH_PAR_DISABLE_LENGTH_FILTERING+x} ]; then echo "${VIASH_PAR_DISABLE_LENGTH_FILTERING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_disable_length_filtering='&'#" ; else echo "# par_disable_length_filtering="; fi ) +$( if [ ! -z ${VIASH_PAR_LENGTH_REQUIRED+x} ]; then echo "${VIASH_PAR_LENGTH_REQUIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_length_required='&'#" ; else echo "# par_length_required="; fi ) +$( if [ ! -z ${VIASH_PAR_LENGTH_LIMIT+x} ]; then echo "${VIASH_PAR_LENGTH_LIMIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_length_limit='&'#" ; else echo "# par_length_limit="; fi ) +$( if [ ! -z ${VIASH_PAR_LOW_COMPLEXITY_FILTER+x} ]; then echo "${VIASH_PAR_LOW_COMPLEXITY_FILTER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_low_complexity_filter='&'#" ; else echo "# par_low_complexity_filter="; fi ) +$( if [ ! -z ${VIASH_PAR_COMPLEXITY_THRESHOLD+x} ]; then echo "${VIASH_PAR_COMPLEXITY_THRESHOLD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_complexity_threshold='&'#" ; else echo "# par_complexity_threshold="; fi ) +$( if [ ! -z ${VIASH_PAR_FILTER_BY_INDEX1+x} ]; then echo "${VIASH_PAR_FILTER_BY_INDEX1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_filter_by_index1='&'#" ; else echo "# par_filter_by_index1="; fi ) +$( if [ ! -z ${VIASH_PAR_FILTER_BY_INDEX2+x} ]; then echo "${VIASH_PAR_FILTER_BY_INDEX2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_filter_by_index2='&'#" ; else echo "# par_filter_by_index2="; fi ) +$( if [ ! -z ${VIASH_PAR_FILTER_BY_INDEX_THRESHOLD+x} ]; then echo "${VIASH_PAR_FILTER_BY_INDEX_THRESHOLD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_filter_by_index_threshold='&'#" ; else echo "# par_filter_by_index_threshold="; fi ) +$( if [ ! -z ${VIASH_PAR_CORRECTION+x} ]; then echo "${VIASH_PAR_CORRECTION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_correction='&'#" ; else echo "# par_correction="; fi ) +$( if [ ! -z ${VIASH_PAR_OVERLAP_LEN_REQUIRE+x} ]; then echo "${VIASH_PAR_OVERLAP_LEN_REQUIRE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_overlap_len_require='&'#" ; else echo "# par_overlap_len_require="; fi ) +$( if [ ! -z ${VIASH_PAR_OVERLAP_DIFF_LIMIT+x} ]; then echo "${VIASH_PAR_OVERLAP_DIFF_LIMIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_overlap_diff_limit='&'#" ; else echo "# par_overlap_diff_limit="; fi ) +$( if [ ! -z ${VIASH_PAR_OVERLAP_DIFF_PERCENT_LIMIT+x} ]; then echo "${VIASH_PAR_OVERLAP_DIFF_PERCENT_LIMIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_overlap_diff_percent_limit='&'#" ; else echo "# par_overlap_diff_percent_limit="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI+x} ]; then echo "${VIASH_PAR_UMI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi='&'#" ; else echo "# par_umi="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_LOC+x} ]; then echo "${VIASH_PAR_UMI_LOC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_loc='&'#" ; else echo "# par_umi_loc="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_LEN+x} ]; then echo "${VIASH_PAR_UMI_LEN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_len='&'#" ; else echo "# par_umi_len="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_PREFIX+x} ]; then echo "${VIASH_PAR_UMI_PREFIX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_prefix='&'#" ; else echo "# par_umi_prefix="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_SKIP+x} ]; then echo "${VIASH_PAR_UMI_SKIP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_skip='&'#" ; else echo "# par_umi_skip="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_DELIM+x} ]; then echo "${VIASH_PAR_UMI_DELIM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_delim='&'#" ; else echo "# par_umi_delim="; fi ) +$( if [ ! -z ${VIASH_PAR_OVERREPRESENTATION_ANALYSIS+x} ]; then echo "${VIASH_PAR_OVERREPRESENTATION_ANALYSIS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_overrepresentation_analysis='&'#" ; else echo "# par_overrepresentation_analysis="; fi ) +$( if [ ! -z ${VIASH_PAR_OVERREPRESENTATION_SAMPLING+x} ]; then echo "${VIASH_PAR_OVERREPRESENTATION_SAMPLING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_overrepresentation_sampling='&'#" ; else echo "# par_overrepresentation_sampling="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +# disable flags +unset_if_false=( + par_disable_adapter_trimming + par_detect_adapter_for_pe + par_merge + par_include_unmerged + par_interleaved_in + par_fix_mgi_id + par_phred64 + par_dont_overwrite + par_verbose + par_dedup + par_dont_eval_duplication + par_trim_poly_g + par_disable_trim_poly_g + par_trim_poly_x + par_disable_quality_filtering + par_disable_length_filtering + par_low_complexity_filter + par_umi + par_overrepresentation_analysis +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +# run command +fastp \\\\ + -i "\\$par_in1" \\\\ + -o "\\$par_out1" \\\\ + \\${par_in2:+--in2 "\\${par_in2}"} \\\\ + \\${par_out2:+--out2 "\\${par_out2}"} \\\\ + \\${par_unpaired1:+--unpaired1 "\\${par_unpaired1}"} \\\\ + \\${par_unpaired2:+--unpaired2 "\\${par_unpaired2}"} \\\\ + \\${par_failed_out:+--failed_out "\\${par_failed_out}"} \\\\ + \\${par_overlapped_out:+--overlapped_out "\\${par_overlapped_out}"} \\\\ + \\${par_json:+--json "\\${par_json}"} \\\\ + \\${par_html:+--html "\\${par_html}"} \\\\ + \\${par_report_title:+--report_title "\\${par_report_title}"} \\\\ + \\${par_disable_adapter_trimming:+--disable_adapter_trimming} \\\\ + \\${par_detect_adapter_for_pe:+--detect_adapter_for_pe} \\\\ + \\${par_adapter_sequence:+--adapter_sequence "\\${par_adapter_sequence}"} \\\\ + \\${par_adapter_sequence_r2:+--adapter_sequence_r2 "\\${par_adapter_sequence_r2}"} \\\\ + \\${par_adapter_fasta:+--adapter_fasta "\\${par_adapter_fasta}"} \\\\ + \\${par_trim_front1:+--trim_front1 "\\${par_trim_front1}"} \\\\ + \\${par_trim_tail1:+--trim_tail1 "\\${par_trim_tail1}"} \\\\ + \\${par_max_len1:+--max_len1 "\\${par_max_len1}"} \\\\ + \\${par_trim_front2:+--trim_front2 "\\${par_trim_front2}"} \\\\ + \\${par_trim_tail2:+--trim_tail2 "\\${par_trim_tail2}"} \\\\ + \\${par_max_len2:+--max_len2 "\\${par_max_len2}"} \\\\ + \\${par_merge:+--merge} \\\\ + \\${par_merged_out:+--merged_out "\\${par_merged_out}"} \\\\ + \\${par_include_unmerged:+--include_unmerged} \\\\ + \\${par_interleaved_in:+--interleaved_in} \\\\ + \\${par_fix_mgi_id:+--fix_mgi_id} \\\\ + \\${par_phred64:+--phred64} \\\\ + \\${par_compression:+--compression "\\${par_compression}"} \\\\ + \\${par_dont_overwrite:+--dont_overwrite} \\\\ + \\${par_verbose:+--verbose} \\\\ + \\${par_reads_to_process:+--reads_to_process "\\${par_reads_to_process}"} \\\\ + \\${par_dedup:+--dedup} \\\\ + \\${par_dup_calc_accuracy:+--dup_calc_accuracy "\\${par_dup_calc_accuracy}"} \\\\ + \\${par_dont_eval_duplication:+--dont_eval_duplication} \\\\ + \\${par_trim_poly_g:+--trim_poly_g} \\\\ + \\${par_poly_g_min_len:+--poly_g_min_len "\\${par_poly_g_min_len}"} \\\\ + \\${par_disable_trim_poly_g:+--disable_trim_poly_g} \\\\ + \\${par_trim_poly_x:+--trim_poly_x} \\\\ + \\${par_poly_x_min_len:+--poly_x_min_len "\\${par_poly_x_min_len}"} \\\\ + \\${par_cut_front:+--cut_front "\\${par_cut_front}"} \\\\ + \\${par_cut_tail:+--cut_tail "\\${par_cut_tail}"} \\\\ + \\${par_cut_right:+--cut_right "\\${par_cut_right}"} \\\\ + \\${par_cut_window_size:+--cut_window_size "\\${par_cut_window_size}"} \\\\ + \\${par_cut_mean_quality:+--cut_mean_quality "\\${par_cut_mean_quality}"} \\\\ + \\${par_cut_front_window_size:+--cut_front_window_size "\\${par_cut_front_window_size}"} \\\\ + \\${par_cut_front_mean_quality:+--cut_front_mean_quality "\\${par_cut_front_mean_quality}"} \\\\ + \\${par_cut_tail_window_size:+--cut_tail_window_size "\\${par_cut_tail_window_size}"} \\\\ + \\${par_cut_tail_mean_quality:+--cut_tail_mean_quality "\\${par_cut_tail_mean_quality}"} \\\\ + \\${par_cut_right_window_size:+--cut_right_window_size "\\${par_cut_right_window_size}"} \\\\ + \\${par_cut_right_mean_quality:+--cut_right_mean_quality "\\${par_cut_right_mean_quality}"} \\\\ + \\${par_disable_quality_filtering:+--disable_quality_filtering} \\\\ + \\${par_qualified_quality_phred:+--qualified_quality_phred "\\${par_qualified_quality_phred}"} \\\\ + \\${par_unqualified_percent_limit:+--unqualified_percent_limit "\\${par_unqualified_percent_limit}"} \\\\ + \\${par_n_base_limit:+--n_base_limit "\\${par_n_base_limit}"} \\\\ + \\${par_average_qual:+--average_qual "\\${par_average_qual}"} \\\\ + \\${par_disable_length_filtering:+--disable_length_filtering} \\\\ + \\${par_length_required:+--length_required "\\${par_length_required}"} \\\\ + \\${par_length_limit:+--length_limit "\\${par_length_limit}"} \\\\ + \\${par_low_complexity_filter:+--low_complexity_filter} \\\\ + \\${par_complexity_threshold:+--complexity_threshold "\\${par_complexity_threshold}"} \\\\ + \\${par_filter_by_index1:+--filter_by_index1 "\\${par_filter_by_index1}"} \\\\ + \\${par_filter_by_index2:+--filter_by_index2 "\\${par_filter_by_index2}"} \\\\ + \\${par_filter_by_index_threshold:+--filter_by_index_threshold "\\${par_filter_by_index_threshold}"} \\\\ + \\${par_correction:+--correction} \\\\ + \\${par_overlap_len_require:+--overlap_len_require "\\${par_overlap_len_require}"} \\\\ + \\${par_overlap_diff_limit:+--overlap_diff_limit "\\${par_overlap_diff_limit}"} \\\\ + \\${par_overlap_diff_percent_limit:+--overlap_diff_percent_limit "\\${par_overlap_diff_percent_limit}"} \\\\ + \\${par_umi:+--umi} \\\\ + \\${par_umi_loc:+--umi_loc "\\${par_umi_loc}"} \\\\ + \\${par_umi_len:+--umi_len "\\${par_umi_len}"} \\\\ + \\${par_umi_prefix:+--umi_prefix "\\${par_umi_prefix}"} \\\\ + \\${par_umi_skip:+--umi_skip "\\${par_umi_skip}"} \\\\ + \\${par_umi_delim:+--umi_delim "\\${par_umi_delim}"} \\\\ + \\${par_overrepresentation_analysis:+--overrepresentation_analysis} \\\\ + \\${par_overrepresentation_sampling:+--overrepresentation_sampling "\\${par_overrepresentation_sampling}"} \\\\ + \\${meta_cpus:+--thread "\\${meta_cpus}"} +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/fastp", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/nextflow.config new file mode 100644 index 0000000..ddce601 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'fastp' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'An ultra-fast all-in-one FASTQ preprocessor (QC/adapters/trimming/filtering/splitting/merging...).\n\nFeatures:\n\n - comprehensive quality profiling for both before and after filtering data (quality curves, base contents, KMER, Q20/Q30, GC Ratio, duplication, adapter contents...)\n - filter out bad reads (too low quality, too short, or too many N...)\n - cut low quality bases for per read in its 5\' and 3\' by evaluating the mean quality from a sliding window (like Trimmomatic but faster).\n - trim all reads in front and tail\n - cut adapters. Adapter sequences can be automatically detected, which means you don\'t have to input the adapter sequences to trim them.\n - correct mismatched base pairs in overlapped regions of paired end reads, if one base is with high quality while the other is with ultra low quality\n - trim polyG in 3\' ends, which is commonly seen in NovaSeq/NextSeq data. Trim polyX in 3\' ends to remove unwanted polyX tailing (i.e. polyA tailing for mRNA-Seq data)\n - preprocess unique molecular identifier (UMI) enabled data, shift UMI to sequence name.\n - report JSON format result for further interpreting.\n - visualize quality control and filtering results on a single HTML page (like FASTQC but faster and more informative).\n - split the output to multiple files (0001.R1.gz, 0002.R1.gz...) to support parallel processing. Two modes can be used, limiting the total split file number, or limitting the lines of each split file.\n - support long reads (data from PacBio / Nanopore devices).\n - support reading from STDIN and writing to STDOUT\n - support interleaved input\n - support ultra-fast FASTQ-level deduplication\n' + author = 'Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/nextflow_schema.json new file mode 100644 index 0000000..3467000 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/nextflow_schema.json @@ -0,0 +1,1131 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "fastp", +"description": "An ultra-fast all-in-one FASTQ preprocessor (QC/adapters/trimming/filtering/splitting/merging...).\n\nFeatures:\n\n - comprehensive quality profiling for both before and after filtering data (quality curves, base contents, KMER, Q20/Q30, GC Ratio, duplication, adapter contents...)\n - filter out bad reads (too low quality, too short, or too many N...)\n - cut low quality bases for per read in its 5\u0027 and 3\u0027 by evaluating the mean quality from a sliding window (like Trimmomatic but faster).\n - trim all reads in front and tail\n - cut adapters. Adapter sequences can be automatically detected, which means you don\u0027t have to input the adapter sequences to trim them.\n - correct mismatched base pairs in overlapped regions of paired end reads, if one base is with high quality while the other is with ultra low quality\n - trim polyG in 3\u0027 ends, which is commonly seen in NovaSeq/NextSeq data. Trim polyX in 3\u0027 ends to remove unwanted polyX tailing (i.e. polyA tailing for mRNA-Seq data)\n - preprocess unique molecular identifier (UMI) enabled data, shift UMI to sequence name.\n - report JSON format result for further interpreting.\n - visualize quality control and filtering results on a single HTML page (like FASTQC but faster and more informative).\n - split the output to multiple files (0001.R1.gz, 0002.R1.gz...) to support parallel processing. Two modes can be used, limiting the total split file number, or limitting the lines of each split file.\n - support long reads (data from PacBio / Nanopore devices).\n - support reading from STDIN and writing to STDOUT\n - support interleaved input\n - support ultra-fast FASTQ-level deduplication\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "`fastp` supports both single-end (SE) and paired-end (PE) input.\n\n- for SE data, you only have to specify read1 input by `-i` or `--in1`.\n- for PE data, you should also specify read2 input by `-I` or `--in2`.\n", + "properties": { + + + "in1": { + "type": + "string", + "description": "Type: `file`, required, example: `in.R1.fq.gz`. Input FastQ file", + "help_text": "Type: `file`, required, example: `in.R1.fq.gz`. Input FastQ file. Must be single-end or paired-end R1. Can be gzipped." + + } + + + , + "in2": { + "type": + "string", + "description": "Type: `file`, example: `in.R2.fq.gz`. Input FastQ file", + "help_text": "Type: `file`, example: `in.R2.fq.gz`. Input FastQ file. Must be paired-end R2. Can be gzipped." + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "\n- for SE data, you only have to specify read1 output by `-o` or `--out1`.\n- for PE data, you should also specify read2 output by `-O` or `--out2`.\n- if you don\u0027t specify the output file names, no output files will be written, but the QC will still be done for both data before and after filtering.\n- the output will be gzip-compressed if its file name ends with `.gz`\n", + "properties": { + + + "out1": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.out1.gz`, example: `out.R1.fq.gz`. The single-end or paired-end R1 reads that pass QC", + "help_text": "Type: `file`, required, default: `$id.$key.out1.gz`, example: `out.R1.fq.gz`. The single-end or paired-end R1 reads that pass QC. Will be gzipped if its file name ends with `.gz`." + , + "default":"$id.$key.out1.gz" + } + + + , + "out2": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.out2.gz`, example: `out.R2.fq.gz`. The paired-end R2 reads that pass QC", + "help_text": "Type: `file`, default: `$id.$key.out2.gz`, example: `out.R2.fq.gz`. The paired-end R2 reads that pass QC. Will be gzipped if its file name ends with `.gz`." + , + "default":"$id.$key.out2.gz" + } + + + , + "unpaired1": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.unpaired1.gz`, example: `unpaired.R1.fq.gz`. Store the reads that `read1` passes filters but its paired `read2` doesn\u0027t", + "help_text": "Type: `file`, default: `$id.$key.unpaired1.gz`, example: `unpaired.R1.fq.gz`. Store the reads that `read1` passes filters but its paired `read2` doesn\u0027t." + , + "default":"$id.$key.unpaired1.gz" + } + + + , + "unpaired2": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.unpaired2.gz`, example: `unpaired.R2.fq.gz`. Store the reads that `read2` passes filters but its paired `read1` doesn\u0027t", + "help_text": "Type: `file`, default: `$id.$key.unpaired2.gz`, example: `unpaired.R2.fq.gz`. Store the reads that `read2` passes filters but its paired `read1` doesn\u0027t." + , + "default":"$id.$key.unpaired2.gz" + } + + + , + "failed_out": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.failed_out.gz`, example: `failed.fq.gz`. Store the reads that fail filters", + "help_text": "Type: `file`, default: `$id.$key.failed_out.gz`, example: `failed.fq.gz`. Store the reads that fail filters.\n\nIf one read failed and is written to --failed_out, its failure reason will be appended to its read name. For example, failed_quality_filter, failed_too_short etc.\nFor PE data, if unpaired reads are not stored (by giving --unpaired1 or --unpaired2), the failed pair of reads will be put together. If one read passes the filters but its pair doesn\u0027t, the failure reason will be paired_read_is_failing.\n" + , + "default":"$id.$key.failed_out.gz" + } + + + , + "overlapped_out": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.overlapped_out`. For each read pair, output the overlapped region if it has no any mismatched base", + "help_text": "Type: `file`, default: `$id.$key.overlapped_out`. For each read pair, output the overlapped region if it has no any mismatched base.\n" + , + "default":"$id.$key.overlapped_out" + } + + +} +}, + + + "report output arguments" : { + "title": "Report output arguments", + "type": "object", + "description": "No description", + "properties": { + + + "json": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.json.json`, example: `out.json`. The json format report file name\n", + "help_text": "Type: `file`, default: `$id.$key.json.json`, example: `out.json`. The json format report file name\n" + , + "default":"$id.$key.json.json" + } + + + , + "html": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.html.html`, example: `out.html`. The html format report file name\n", + "help_text": "Type: `file`, default: `$id.$key.html.html`, example: `out.html`. The html format report file name\n" + , + "default":"$id.$key.html.html" + } + + + , + "report_title": { + "type": + "string", + "description": "Type: `string`, example: `fastp report`. The title of the html report, default is \"fastp report\"", + "help_text": "Type: `string`, example: `fastp report`. The title of the html report, default is \"fastp report\".\n" + + } + + +} +}, + + + "adapter trimming" : { + "title": "Adapter trimming", + "type": "object", + "description": "Adapter trimming is enabled by default, but you can disable it by `-A` or `--disable_adapter_trimming`. Adapter sequences can be automatically detected for both PE/SE data.\n\n- For SE data, the adapters are evaluated by analyzing the tails of first ~1M reads. This evaluation may be inacurrate, and you can specify the adapter sequence by `-a` or `--adapter_sequence` option. If adapter sequence is specified, the auto detection for SE data will be disabled.\n- For PE data, the adapters can be detected by per-read overlap analysis, which seeks for the overlap of each pair of reads. This method is robust and fast, so normally you don\u0027t have to input the adapter sequence even you know it. But you can still specify the adapter sequences for read1 by `--adapter_sequence`, and for read2 by `--adapter_sequence_r2`. If `fastp` fails to find an overlap (i.e. due to low quality bases), it will use these sequences to trim adapters for read1 and read2 respectively.\n- For PE data, the adapter sequence auto-detection is disabled by default since the adapters can be trimmed by overlap analysis. However, you can specify `--detect_adapter_for_pe` to enable it.\n- For PE data, `fastp` will run a little slower if you specify the sequence adapters or enable adapter auto-detection, but usually result in a slightly cleaner output, since the overlap analysis may fail due to sequencing errors or adapter dimers.\n- The most widely used adapter is the Illumina TruSeq adapters. If your data is from the TruSeq library, you can add `--adapter_sequence=AGATCGGAAGAGCACACGTCTGAACTCCAGTCA --adapter_sequence_r2=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT` to your command lines, or enable auto detection for PE data by specifing `detect_adapter_for_pe`.\n- `fastp` contains some built-in known adapter sequences for better auto-detection. If you want to make some adapters to be a part of the built-in adapters, please file an issue.\n\nYou can also specify --adapter_fasta to give a FASTA file to tell fastp to trim multiple adapters in this FASTA file. Here is a sample of such adapter FASTA file:\n\n```\n\u003eIllumina TruSeq Adapter Read 1\nAGATCGGAAGAGCACACGTCTGAACTCCAGTCA\n\u003eIllumina TruSeq Adapter Read 2\nAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT\n\u003epolyA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n```\n\nThe adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. And you can give whatever you want to trim, rather than regular sequencing adapters (i.e. polyA).\n\n`fastp` first trims the auto-detected adapter or the adapter sequences given by `--adapter_sequence | --adapter_sequence_r2`, then trims the adapters given by `--adapter_fasta` one by one.\n\nThe sequence distribution of trimmed adapters can be found at the HTML/JSON reports.\n", + "properties": { + + + "disable_adapter_trimming": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disable adapter trimming", + "help_text": "Type: `boolean_true`, default: `false`. Disable adapter trimming.\n" + , + "default":false + } + + + , + "detect_adapter_for_pe": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. By default, the auto-detection for adapter is for SE data input only, turn on this option to enable it for PE data", + "help_text": "Type: `boolean_true`, default: `false`. By default, the auto-detection for adapter is for SE data input only, turn on this option to enable it for PE data.\n" + , + "default":false + } + + + , + "adapter_sequence": { + "type": + "string", + "description": "Type: `string`. The adapter sequences to be trimmed", + "help_text": "Type: `string`. The adapter sequences to be trimmed. For SE data, if not specified, the adapters will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped\n" + + } + + + , + "adapter_sequence_r2": { + "type": + "string", + "description": "Type: `string`. The adapter sequences to be trimmed for R2", + "help_text": "Type: `string`. The adapter sequences to be trimmed for R2. This is used for PE data if R1/R2 are found overlapped.\n" + + } + + + , + "adapter_fasta": { + "type": + "string", + "description": "Type: `file`. A FASTA file containing all the adapter sequences to be trimmed", + "help_text": "Type: `file`. A FASTA file containing all the adapter sequences to be trimmed. For SE data, if not specified, the adapters will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped.\n" + + } + + +} +}, + + + "base trimming" : { + "title": "Base trimming", + "type": "object", + "description": "No description", + "properties": { + + + "trim_front1": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Trimming how many bases in front for read1, default is 0", + "help_text": "Type: `integer`, example: `0`. Trimming how many bases in front for read1, default is 0.\n" + + } + + + , + "trim_tail1": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Trimming how many bases in tail for read1, default is 0", + "help_text": "Type: `integer`, example: `0`. Trimming how many bases in tail for read1, default is 0.\n" + + } + + + , + "max_len1": { + "type": + "integer", + "description": "Type: `integer`. If read1 is longer than max_len1, then trim read1 at its tail to make it as long as max_len1", + "help_text": "Type: `integer`. If read1 is longer than max_len1, then trim read1 at its tail to make it as long as max_len1. Default 0 means no limitation.\n" + + } + + + , + "trim_front2": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Trimming how many bases in front for read2, default is 0", + "help_text": "Type: `integer`, example: `0`. Trimming how many bases in front for read2, default is 0.\n" + + } + + + , + "trim_tail2": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Trimming how many bases in tail for read2, default is 0", + "help_text": "Type: `integer`, example: `0`. Trimming how many bases in tail for read2, default is 0.\n" + + } + + + , + "max_len2": { + "type": + "integer", + "description": "Type: `integer`. If read2 is longer than max_len2, then trim read2 at its tail to make it as long as max_len2", + "help_text": "Type: `integer`. If read2 is longer than max_len2, then trim read2 at its tail to make it as long as max_len2. Default 0 means no limitation.\n" + + } + + +} +}, + + + "merging mode" : { + "title": "Merging mode", + "type": "object", + "description": "Allows merging paired-end reads into a single longer read if they are overlapping.", + "properties": { + + + "merge": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. For paired-end input, merge each pair of reads into a single read if they are overlapped", + "help_text": "Type: `boolean_true`, default: `false`. For paired-end input, merge each pair of reads into a single read if they are overlapped. The merged reads will be written to the file given by --merged_out, the unmerged reads will be written to the files specified by --out1 and --out2. The merging mode is disabled by default.\n" + , + "default":false + } + + + , + "merged_out": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.merged_out.gz`, example: `merged.fq.gz`. In the merging mode, specify the file name to store merged output, or specify --stdout to stream the merged output", + "help_text": "Type: `file`, default: `$id.$key.merged_out.gz`, example: `merged.fq.gz`. In the merging mode, specify the file name to store merged output, or specify --stdout to stream the merged output.\n" + , + "default":"$id.$key.merged_out.gz" + } + + + , + "include_unmerged": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. In the merging mode, write the unmerged or unpaired reads to the file specified by --merge", + "help_text": "Type: `boolean_true`, default: `false`. In the merging mode, write the unmerged or unpaired reads to the file specified by --merge. Disabled by default.\n" + , + "default":false + } + + +} +}, + + + "additional input arguments" : { + "title": "Additional input arguments", + "type": "object", + "description": "Affects how the input is read.", + "properties": { + + + "interleaved_in": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Indicate that \u003cin1\u003e is an interleaved FASTQ which contains both read1 and read2", + "help_text": "Type: `boolean_true`, default: `false`. Indicate that \u003cin1\u003e is an interleaved FASTQ which contains both read1 and read2. Disabled by default.\n" + , + "default":false + } + + + , + "fix_mgi_id": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. The MGI FASTQ ID format is not compatible with many BAM operation tools, enable this option to fix it", + "help_text": "Type: `boolean_true`, default: `false`. The MGI FASTQ ID format is not compatible with many BAM operation tools, enable this option to fix it.\n" + , + "default":false + } + + + , + "phred64": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Indicate the input is using phred64 scoring (it\u0027ll be converted to phred33, so the output will still be phred33)\n", + "help_text": "Type: `boolean_true`, default: `false`. Indicate the input is using phred64 scoring (it\u0027ll be converted to phred33, so the output will still be phred33)\n" + , + "default":false + } + + +} +}, + + + "additional output arguments" : { + "title": "Additional output arguments", + "type": "object", + "description": "Affects how the output is written.", + "properties": { + + + "compression": { + "type": + "integer", + "description": "Type: `integer`, example: `4`. Compression level for gzip output (1 ~ 9)", + "help_text": "Type: `integer`, example: `4`. Compression level for gzip output (1 ~ 9). 1 is fastest, 9 is smallest, default is 4.\n" + + } + + + , + "dont_overwrite": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Don\u0027t overwrite existing files", + "help_text": "Type: `boolean_true`, default: `false`. Don\u0027t overwrite existing files. Overwritting is allowed by default.\n" + , + "default":false + } + + +} +}, + + + "logging arguments" : { + "title": "Logging arguments", + "type": "object", + "description": "No description", + "properties": { + + + "verbose": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output verbose log information (i", + "help_text": "Type: `boolean_true`, default: `false`. Output verbose log information (i.e. when every 1M reads are processed)." + , + "default":false + } + + +} +}, + + + "processing arguments" : { + "title": "Processing arguments", + "type": "object", + "description": "No description", + "properties": { + + + "reads_to_process": { + "type": + "string", + "description": "Type: `long`, example: `1000000`. Specify how many reads/pairs to be processed", + "help_text": "Type: `long`, example: `1000000`. Specify how many reads/pairs to be processed. Default 0 means process all reads.\n" + + } + + +} +}, + + + "deduplication arguments" : { + "title": "Deduplication arguments", + "type": "object", + "description": "No description", + "properties": { + + + "dedup": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Enable deduplication to drop the duplicated reads/pairs\n", + "help_text": "Type: `boolean_true`, default: `false`. Enable deduplication to drop the duplicated reads/pairs\n" + , + "default":false + } + + + , + "dup_calc_accuracy": { + "type": + "integer", + "description": "Type: `integer`, example: `3`. Accuracy level to calculate duplication (1~6)", + "help_text": "Type: `integer`, example: `3`. Accuracy level to calculate duplication (1~6). Higher level uses more memory (1G, 2G, 4G, 8G, 16G, 24G). Default 1 for no-dedup mode, and 3 for dedup mode.\n" + + } + + + , + "dont_eval_duplication": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Don\u0027t evaluate duplication rate to save time and use less memory", + "help_text": "Type: `boolean_true`, default: `false`. Don\u0027t evaluate duplication rate to save time and use less memory.\n" + , + "default":false + } + + +} +}, + + + "polyg tail trimming arguments" : { + "title": "PolyG tail trimming arguments", + "type": "object", + "description": "No description", + "properties": { + + + "trim_poly_g": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Force polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data\n", + "help_text": "Type: `boolean_true`, default: `false`. Force polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data\n" + , + "default":false + } + + + , + "poly_g_min_len": { + "type": + "integer", + "description": "Type: `integer`, example: `10`. The minimum length to detect polyG in the read tail", + "help_text": "Type: `integer`, example: `10`. The minimum length to detect polyG in the read tail. 10 by default.\n" + + } + + + , + "disable_trim_poly_g": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disable polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data\n", + "help_text": "Type: `boolean_true`, default: `false`. Disable polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data\n" + , + "default":false + } + + +} +}, + + + "polyx tail trimming arguments" : { + "title": "PolyX tail trimming arguments", + "type": "object", + "description": "No description", + "properties": { + + + "trim_poly_x": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Enable polyX trimming in 3\u0027 ends", + "help_text": "Type: `boolean_true`, default: `false`. Enable polyX trimming in 3\u0027 ends.\n" + , + "default":false + } + + + , + "poly_x_min_len": { + "type": + "integer", + "description": "Type: `integer`, example: `10`. The minimum length to detect polyX in the read tail", + "help_text": "Type: `integer`, example: `10`. The minimum length to detect polyX in the read tail. 10 by default.\n" + + } + + +} +}, + + + "cut arguments" : { + "title": "Cut arguments", + "type": "object", + "description": "No description", + "properties": { + + + "cut_front": { + "type": + "integer", + "description": "Type: `integer`. Move a sliding window from front (5\u0027) to tail, drop the bases in the window if its mean quality \u003c threshold, stop otherwise", + "help_text": "Type: `integer`. Move a sliding window from front (5\u0027) to tail, drop the bases in the window if its mean quality \u003c threshold, stop otherwise.\n" + + } + + + , + "cut_tail": { + "type": + "integer", + "description": "Type: `integer`. Move a sliding window from tail (3\u0027) to front, drop the bases in the window if its mean quality \u003c threshold, stop otherwise", + "help_text": "Type: `integer`. Move a sliding window from tail (3\u0027) to front, drop the bases in the window if its mean quality \u003c threshold, stop otherwise.\n" + + } + + + , + "cut_right": { + "type": + "integer", + "description": "Type: `integer`. Move a sliding window from front to tail, if meet one window with mean quality \u003c threshold, drop the bases in the window and the right part, and then stop", + "help_text": "Type: `integer`. Move a sliding window from front to tail, if meet one window with mean quality \u003c threshold, drop the bases in the window and the right part, and then stop.\n" + + } + + + , + "cut_window_size": { + "type": + "integer", + "description": "Type: `integer`, example: `4`. The window size option shared by cut_front, cut_tail or cut_sliding", + "help_text": "Type: `integer`, example: `4`. The window size option shared by cut_front, cut_tail or cut_sliding. Range: 1~1000, default: 4.\n" + + } + + + , + "cut_mean_quality": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. The mean quality requirement option shared by cut_front, cut_tail or cut_sliding", + "help_text": "Type: `integer`, example: `20`. The mean quality requirement option shared by cut_front, cut_tail or cut_sliding. Range: 1~36 default: 20 (Q20)\n" + + } + + + , + "cut_front_window_size": { + "type": + "integer", + "description": "Type: `integer`, example: `4`. The window size option of cut_front, default to cut_window_size if not specified", + "help_text": "Type: `integer`, example: `4`. The window size option of cut_front, default to cut_window_size if not specified.\n" + + } + + + , + "cut_front_mean_quality": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. The mean quality requirement option of cut_front, default to cut_mean_quality if not specified", + "help_text": "Type: `integer`, example: `20`. The mean quality requirement option of cut_front, default to cut_mean_quality if not specified.\n" + + } + + + , + "cut_tail_window_size": { + "type": + "integer", + "description": "Type: `integer`, example: `4`. The window size option of cut_tail, default to cut_window_size if not specified", + "help_text": "Type: `integer`, example: `4`. The window size option of cut_tail, default to cut_window_size if not specified.\n" + + } + + + , + "cut_tail_mean_quality": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. The mean quality requirement option of cut_tail, default to cut_mean_quality if not specified", + "help_text": "Type: `integer`, example: `20`. The mean quality requirement option of cut_tail, default to cut_mean_quality if not specified.\n" + + } + + + , + "cut_right_window_size": { + "type": + "integer", + "description": "Type: `integer`, example: `4`. The window size option of cut_right, default to cut_window_size if not specified", + "help_text": "Type: `integer`, example: `4`. The window size option of cut_right, default to cut_window_size if not specified.\n" + + } + + + , + "cut_right_mean_quality": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. The mean quality requirement option of cut_right, default to cut_mean_quality if not specified", + "help_text": "Type: `integer`, example: `20`. The mean quality requirement option of cut_right, default to cut_mean_quality if not specified.\n" + + } + + +} +}, + + + "quality filtering arguments" : { + "title": "Quality filtering arguments", + "type": "object", + "description": "No description", + "properties": { + + + "disable_quality_filtering": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Quality filtering is enabled by default", + "help_text": "Type: `boolean_true`, default: `false`. Quality filtering is enabled by default. If this option is specified, quality filtering is disabled.\n" + , + "default":false + } + + + , + "qualified_quality_phred": { + "type": + "integer", + "description": "Type: `integer`, example: `15`. The quality value that a base is qualified", + "help_text": "Type: `integer`, example: `15`. The quality value that a base is qualified. Default 15 means phred quality \u003e=Q15 is qualified.\n" + + } + + + , + "unqualified_percent_limit": { + "type": + "integer", + "description": "Type: `integer`, example: `40`. How many percents of bases are allowed to be unqualified (0~100)", + "help_text": "Type: `integer`, example: `40`. How many percents of bases are allowed to be unqualified (0~100). Default 40 means 40%.\n" + + } + + + , + "n_base_limit": { + "type": + "integer", + "description": "Type: `integer`, example: `5`. If one read\u0027s number of N base is \u003en_base_limit, then this read/pair is discarded", + "help_text": "Type: `integer`, example: `5`. If one read\u0027s number of N base is \u003en_base_limit, then this read/pair is discarded. Default is 5.\n" + + } + + + , + "average_qual": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. If one read\u0027s average quality score \u003cavg_qual, then this read/pair is discarded", + "help_text": "Type: `integer`, example: `0`. If one read\u0027s average quality score \u003cavg_qual, then this read/pair is discarded. Default 0 means no requirement.\n" + + } + + +} +}, + + + "length filtering arguments" : { + "title": "Length filtering arguments", + "type": "object", + "description": "No description", + "properties": { + + + "disable_length_filtering": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Length filtering is enabled by default", + "help_text": "Type: `boolean_true`, default: `false`. Length filtering is enabled by default. If this option is specified, length filtering is disabled.\n" + , + "default":false + } + + + , + "length_required": { + "type": + "integer", + "description": "Type: `integer`, example: `15`. Reads shorter than length_required will be discarded, default is 15", + "help_text": "Type: `integer`, example: `15`. Reads shorter than length_required will be discarded, default is 15.\n" + + } + + + , + "length_limit": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Reads longer than length_limit will be discarded, default 0 means no limitation", + "help_text": "Type: `integer`, example: `0`. Reads longer than length_limit will be discarded, default 0 means no limitation.\n" + + } + + +} +}, + + + "low complexity filtering arguments" : { + "title": "Low complexity filtering arguments", + "type": "object", + "description": "No description", + "properties": { + + + "low_complexity_filter": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Enable low complexity filter", + "help_text": "Type: `boolean_true`, default: `false`. Enable low complexity filter. The complexity is defined as the percentage of base that is different from its next base (base[i] != base[i+1]).\n" + , + "default":false + } + + + , + "complexity_threshold": { + "type": + "integer", + "description": "Type: `integer`, example: `30`. The threshold for low complexity filter (0~100)", + "help_text": "Type: `integer`, example: `30`. The threshold for low complexity filter (0~100). Default is 30, which means 30% complexity is required.\n" + + } + + +} +}, + + + "index filtering arguments" : { + "title": "Index filtering arguments", + "type": "object", + "description": "No description", + "properties": { + + + "filter_by_index1": { + "type": + "string", + "description": "Type: `file`. Specify a file contains a list of barcodes of index1 to be filtered out, one barcode per line", + "help_text": "Type: `file`. Specify a file contains a list of barcodes of index1 to be filtered out, one barcode per line.\n" + + } + + + , + "filter_by_index2": { + "type": + "string", + "description": "Type: `file`. Specify a file contains a list of barcodes of index2 to be filtered out, one barcode per line", + "help_text": "Type: `file`. Specify a file contains a list of barcodes of index2 to be filtered out, one barcode per line.\n" + + } + + + , + "filter_by_index_threshold": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. The allowed difference of index barcode for index filtering, default 0 means completely identical", + "help_text": "Type: `integer`, example: `0`. The allowed difference of index barcode for index filtering, default 0 means completely identical.\n" + + } + + +} +}, + + + "overlapped region correction" : { + "title": "Overlapped region correction", + "type": "object", + "description": "No description", + "properties": { + + + "correction": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Enable base correction in overlapped regions (only for PE data), default is disabled", + "help_text": "Type: `boolean_true`, default: `false`. Enable base correction in overlapped regions (only for PE data), default is disabled.\n" + , + "default":false + } + + + , + "overlap_len_require": { + "type": + "integer", + "description": "Type: `integer`, example: `30`. The minimum length to detect overlapped region of PE reads", + "help_text": "Type: `integer`, example: `30`. The minimum length to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. 30 by default.\n" + + } + + + , + "overlap_diff_limit": { + "type": + "integer", + "description": "Type: `integer`, example: `5`. The maximum number of mismatched bases to detect overlapped region of PE reads", + "help_text": "Type: `integer`, example: `5`. The maximum number of mismatched bases to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. 5 by default.\n" + + } + + + , + "overlap_diff_percent_limit": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. The maximum percentage of mismatched bases to detect overlapped region of PE reads", + "help_text": "Type: `integer`, example: `20`. The maximum percentage of mismatched bases to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. Default 20 means 20%.\n" + + } + + +} +}, + + + "umi arguments" : { + "title": "UMI arguments", + "type": "object", + "description": "No description", + "properties": { + + + "umi": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Enable unique molecular identifier (UMI) preprocessing", + "help_text": "Type: `boolean_true`, default: `false`. Enable unique molecular identifier (UMI) preprocessing.\n" + , + "default":false + } + + + , + "umi_loc": { + "type": + "string", + "description": "Type: `string`, choices: ``index1`, `index2`, `read1`, `read2`, `per_index`, `per_read``. Specify the location of UMI, can be (index1/index2/read1/read2/per_index/per_read, default is none", + "help_text": "Type: `string`, choices: ``index1`, `index2`, `read1`, `read2`, `per_index`, `per_read``. Specify the location of UMI, can be (index1/index2/read1/read2/per_index/per_read, default is none.\n", + "enum": ["index1", "index2", "read1", "read2", "per_index", "per_read"] + + + } + + + , + "umi_len": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. If the UMI is in read1/read2, its length should be provided", + "help_text": "Type: `integer`, example: `0`. If the UMI is in read1/read2, its length should be provided.\n" + + } + + + , + "umi_prefix": { + "type": + "string", + "description": "Type: `string`. If specified, an underline will be used to connect prefix and UMI (i", + "help_text": "Type: `string`. If specified, an underline will be used to connect prefix and UMI (i.e. prefix=UMI, UMI=AATTCG, final=UMI_AATTCG). No prefix by default.\n" + + } + + + , + "umi_skip": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. If the UMI is in read1/read2, fastp can skip several bases following UMI, default is 0", + "help_text": "Type: `integer`, example: `0`. If the UMI is in read1/read2, fastp can skip several bases following UMI, default is 0.\n" + + } + + + , + "umi_delim": { + "type": + "string", + "description": "Type: `string`. If the UMI is in index1/index2, fastp can use a delimiter to separate UMI from the read sequence, default is none", + "help_text": "Type: `string`. If the UMI is in index1/index2, fastp can use a delimiter to separate UMI from the read sequence, default is none.\n" + + } + + +} +}, + + + "overrepresentation analysis arguments" : { + "title": "Overrepresentation analysis arguments", + "type": "object", + "description": "No description", + "properties": { + + + "overrepresentation_analysis": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Enable overrepresentation analysis", + "help_text": "Type: `boolean_true`, default: `false`. Enable overrepresentation analysis.\n" + , + "default":false + } + + + , + "overrepresentation_sampling": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. One in (--overrepresentation_sampling) reads will be computed for overrepresentation analysis (1~10000), smaller is slower, default is 20", + "help_text": "Type: `integer`, example: `20`. One in (--overrepresentation_sampling) reads will be computed for overrepresentation analysis (1~10000), smaller is slower, default is 20.\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/report output arguments" + }, + + { + "$ref": "#/definitions/adapter trimming" + }, + + { + "$ref": "#/definitions/base trimming" + }, + + { + "$ref": "#/definitions/merging mode" + }, + + { + "$ref": "#/definitions/additional input arguments" + }, + + { + "$ref": "#/definitions/additional output arguments" + }, + + { + "$ref": "#/definitions/logging arguments" + }, + + { + "$ref": "#/definitions/processing arguments" + }, + + { + "$ref": "#/definitions/deduplication arguments" + }, + + { + "$ref": "#/definitions/polyg tail trimming arguments" + }, + + { + "$ref": "#/definitions/polyx tail trimming arguments" + }, + + { + "$ref": "#/definitions/cut arguments" + }, + + { + "$ref": "#/definitions/quality filtering arguments" + }, + + { + "$ref": "#/definitions/length filtering arguments" + }, + + { + "$ref": "#/definitions/low complexity filtering arguments" + }, + + { + "$ref": "#/definitions/index filtering arguments" + }, + + { + "$ref": "#/definitions/overlapped region correction" + }, + + { + "$ref": "#/definitions/umi arguments" + }, + + { + "$ref": "#/definitions/overrepresentation analysis arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/.config.vsh.yaml new file mode 100644 index 0000000..86d9bc5 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/.config.vsh.yaml @@ -0,0 +1,381 @@ +name: "fastqc" +version: "v0.3.1" +authors: +- name: "Theodoro Gasperin Terra Camargo" + roles: + - "author" + - "maintainer" + info: + links: + email: "theodorogtc@gmail.com" + github: "tgaspe" + linkedin: "theodoro-gasperin-terra-camargo" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "FASTQ file(s) to be analyzed.\n" + info: null + example: + - "input.fq" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Outputs" + description: "At least one of the output options (--html, --zip, --summary, --data)\ + \ must be used.\n" + arguments: + - type: "file" + name: "--html" + description: "Create the HTML report of the results. \n'*' wild card must be provided\ + \ in the output file name. \nWild card will be replaced by the input file basename.\n\ + e.g. \n --input \"sample_1.fq\"\n --html \"*.html\"\n would create an output\ + \ html file named sample_1.html\n" + info: null + example: + - "*.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--zip" + description: "Create the zip file(s) containing: html report, data, images, icons,\ + \ summary, etc.\n'*' wild card must be provided in the output file name.\nWild\ + \ card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\ + \n --html \"*.zip\"\n would create an output zip file named sample_1.zip\n" + info: null + example: + - "*.zip" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--summary" + description: "Create the summary file(s).\n'*' wild card must be provided in the\ + \ output file name.\nWild card will be replaced by the input basename.\ne.g.\ + \ \n --input \"sample_1.fq\"\n --summary \"*_summary.txt\"\n would create\ + \ an output summary.txt file named sample_1_summary.txt\n" + info: null + example: + - "*_summary.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--data" + description: "Create the data file(s).\n'*' wild card must be provided in the\ + \ output file name.\nWild card will be replaced by the input basename.\ne.g.\ + \ \n --input \"sample_1.fq\"\n --summary \"*_data.txt\"\n would create an\ + \ output data.txt file named sample_1_data.txt\n" + info: null + example: + - "*_data.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" +- name: "Options" + arguments: + - type: "boolean_true" + name: "--casava" + description: "Files come from raw casava output. Files in the same sample\ngroup\ + \ (differing only by the group number) will be analysed\nas a set rather than\ + \ individually. Sequences with the filter\nflag set in the header will be excluded\ + \ from the analysis.\nFiles must have the same names given to them by casava\n\ + (including being gzipped and ending with .gz) otherwise they\nwon't be grouped\ + \ together correctly.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--nano" + description: "Files come from nanopore sequences and are in fast5 format. In\n\ + this mode you can pass in directories to process and the program\nwill take\ + \ in all fast5 files within those directories and produce\na single output file\ + \ from the sequences found in all files.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--nofilter" + description: "If running with --casava then don't remove read flagged by\ncasava\ + \ as poor quality when performing the QC analysis.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--nogroup" + description: "Disable grouping of bases for reads >50bp. \nAll reports will show\ + \ data for every base in the read. \nWARNING: Using this option will cause fastqc\ + \ to crash \nand burn if you use it on really long reads, and your \nplots may\ + \ end up a ridiculous size. You have been warned!\n" + info: null + direction: "input" + - type: "integer" + name: "--min_length" + description: "Sets an artificial lower limit on the length of the \nsequence to\ + \ be shown in the report. As long as you \nset this to a value greater or equal\ + \ to your longest \nread length then this will be the sequence length used \n\ + to create your read groups. This can be useful for making\ndirectly comparable\ + \ statistics from datasets with somewhat \nvariable read lengths.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--format" + alternatives: + - "-f" + description: "Bypasses the normal sequence file format detection and \nforces\ + \ the program to use the specified format. \nValid formats are bam, sam, bam_mapped,\ + \ sam_mapped, and fastq.\n" + info: null + example: + - "bam" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--contaminants" + alternatives: + - "-c" + description: "Specifies a non-default file which contains the list \nof contaminants\ + \ to screen overrepresented sequences against. \nThe file must contain sets\ + \ of named contaminants in the form\nname[tab]sequence. Lines prefixed with\ + \ a hash will be ignored.\n" + info: null + example: + - "contaminants.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--adapters" + alternatives: + - "-a" + description: "Specifies a non-default file which contains the list of \nadapter\ + \ sequences which will be explicitly searched against \nthe library. The file\ + \ must contain sets of named adapters \nin the form name[tab]sequence. Lines\ + \ prefixed with a hash will be ignored.\n" + info: null + example: + - "adapters.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--limits" + alternatives: + - "-l" + description: "Specifies a non-default file which contains \na set of criteria\ + \ which will be used to determine \nthe warn/error limits for the various modules.\ + \ \nThis file can also be used to selectively remove \nsome modules from the\ + \ output altogether. The format \nneeds to mirror the default limits.txt file\ + \ found in \nthe Configuration folder.\n" + info: null + example: + - "limits.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--kmers" + alternatives: + - "-k" + description: "Specifies the length of Kmer to look for in the Kmer \ncontent module.\ + \ Specified Kmer length must be between \n2 and 10. Default length is 7 if not\ + \ specified.\n" + info: null + example: + - 7 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--quiet" + alternatives: + - "-q" + description: "Suppress all progress messages on stdout and only report errors.\n" + info: null + direction: "input" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "FastQC - A high throughput sequence QC analysis tool." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "Quality control" +- "BAM" +- "SAM" +- "FASTQ" +license: "GPL-3.0, Apache-2.0" +links: + repository: "https://github.com/s-andrews/FastQC" + homepage: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/" + documentation: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/" + issue_tracker: "https://github.com/s-andrews/FastQC/issues" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "biocontainers/fastqc:v0.11.9_cv8" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "echo \"fastqc: $(fastqc --version | sed -n 's/^FastQC //p')\" > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/fastqc/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/fastqc" + executable: "target/nextflow/fastqc/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/main.nf new file mode 100644 index 0000000..d58b147 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/main.nf @@ -0,0 +1,4079 @@ +// fastqc v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Theodoro Gasperin Terra Camargo (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "fastqc", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Theodoro Gasperin Terra Camargo", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "theodorogtc@gmail.com", + "github" : "tgaspe", + "linkedin" : "theodoro-gasperin-terra-camargo" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "FASTQ file(s) to be analyzed.\n", + "example" : [ + "input.fq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "description" : "At least one of the output options (--html, --zip, --summary, --data) must be used.\n", + "arguments" : [ + { + "type" : "file", + "name" : "--html", + "description" : "Create the HTML report of the results. \n'*' wild card must be provided in the output file name. \nWild card will be replaced by the input file basename.\ne.g. \n --input \\"sample_1.fq\\"\n --html \\"*.html\\"\n would create an output html file named sample_1.html\n", + "example" : [ + "*.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--zip", + "description" : "Create the zip file(s) containing: html report, data, images, icons, summary, etc.\n'*' wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \\"sample_1.fq\\"\n --html \\"*.zip\\"\n would create an output zip file named sample_1.zip\n", + "example" : [ + "*.zip" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--summary", + "description" : "Create the summary file(s).\n'*' wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \\"sample_1.fq\\"\n --summary \\"*_summary.txt\\"\n would create an output summary.txt file named sample_1_summary.txt\n", + "example" : [ + "*_summary.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--data", + "description" : "Create the data file(s).\n'*' wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \\"sample_1.fq\\"\n --summary \\"*_data.txt\\"\n would create an output data.txt file named sample_1_data.txt\n", + "example" : [ + "*_data.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--casava", + "description" : "Files come from raw casava output. Files in the same sample\ngroup (differing only by the group number) will be analysed\nas a set rather than individually. Sequences with the filter\nflag set in the header will be excluded from the analysis.\nFiles must have the same names given to them by casava\n(including being gzipped and ending with .gz) otherwise they\nwon't be grouped together correctly.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--nano", + "description" : "Files come from nanopore sequences and are in fast5 format. In\nthis mode you can pass in directories to process and the program\nwill take in all fast5 files within those directories and produce\na single output file from the sequences found in all files.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--nofilter", + "description" : "If running with --casava then don't remove read flagged by\ncasava as poor quality when performing the QC analysis.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--nogroup", + "description" : "Disable grouping of bases for reads >50bp. \nAll reports will show data for every base in the read. \nWARNING: Using this option will cause fastqc to crash \nand burn if you use it on really long reads, and your \nplots may end up a ridiculous size. You have been warned!\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--min_length", + "description" : "Sets an artificial lower limit on the length of the \nsequence to be shown in the report. As long as you \nset this to a value greater or equal to your longest \nread length then this will be the sequence length used \nto create your read groups. This can be useful for making\ndirectly comparable statistics from datasets with somewhat \nvariable read lengths.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--format", + "alternatives" : [ + "-f" + ], + "description" : "Bypasses the normal sequence file format detection and \nforces the program to use the specified format. \nValid formats are bam, sam, bam_mapped, sam_mapped, and fastq.\n", + "example" : [ + "bam" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--contaminants", + "alternatives" : [ + "-c" + ], + "description" : "Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against. \nThe file must contain sets of named contaminants in the form\nname[tab]sequence. Lines prefixed with a hash will be ignored.\n", + "example" : [ + "contaminants.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--adapters", + "alternatives" : [ + "-a" + ], + "description" : "Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library. The file must contain sets of named adapters \nin the form name[tab]sequence. Lines prefixed with a hash will be ignored.\n", + "example" : [ + "adapters.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--limits", + "alternatives" : [ + "-l" + ], + "description" : "Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules. \nThis file can also be used to selectively remove \nsome modules from the output altogether. The format \nneeds to mirror the default limits.txt file found in \nthe Configuration folder.\n", + "example" : [ + "limits.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--kmers", + "alternatives" : [ + "-k" + ], + "description" : "Specifies the length of Kmer to look for in the Kmer \ncontent module. Specified Kmer length must be between \n2 and 10. Default length is 7 if not specified.\n", + "example" : [ + 7 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--quiet", + "alternatives" : [ + "-q" + ], + "description" : "Suppress all progress messages on stdout and only report errors.\n", + "direction" : "input" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "FastQC - A high throughput sequence QC analysis tool.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "Quality control", + "BAM", + "SAM", + "FASTQ" + ], + "license" : "GPL-3.0, Apache-2.0", + "links" : { + "repository" : "https://github.com/s-andrews/FastQC", + "homepage" : "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/", + "documentation" : "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/", + "issue_tracker" : "https://github.com/s-andrews/FastQC/issues" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "biocontainers/fastqc:v0.11.9_cv8", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "echo \\"fastqc: $(fastqc --version | sed -n 's/^FastQC //p')\\" > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/fastqc/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/fastqc", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_HTML+x} ]; then echo "${VIASH_PAR_HTML}" | sed "s#'#'\\"'\\"'#g;s#.*#par_html='&'#" ; else echo "# par_html="; fi ) +$( if [ ! -z ${VIASH_PAR_ZIP+x} ]; then echo "${VIASH_PAR_ZIP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_zip='&'#" ; else echo "# par_zip="; fi ) +$( if [ ! -z ${VIASH_PAR_SUMMARY+x} ]; then echo "${VIASH_PAR_SUMMARY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_summary='&'#" ; else echo "# par_summary="; fi ) +$( if [ ! -z ${VIASH_PAR_DATA+x} ]; then echo "${VIASH_PAR_DATA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_data='&'#" ; else echo "# par_data="; fi ) +$( if [ ! -z ${VIASH_PAR_CASAVA+x} ]; then echo "${VIASH_PAR_CASAVA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_casava='&'#" ; else echo "# par_casava="; fi ) +$( if [ ! -z ${VIASH_PAR_NANO+x} ]; then echo "${VIASH_PAR_NANO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nano='&'#" ; else echo "# par_nano="; fi ) +$( if [ ! -z ${VIASH_PAR_NOFILTER+x} ]; then echo "${VIASH_PAR_NOFILTER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nofilter='&'#" ; else echo "# par_nofilter="; fi ) +$( if [ ! -z ${VIASH_PAR_NOGROUP+x} ]; then echo "${VIASH_PAR_NOGROUP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nogroup='&'#" ; else echo "# par_nogroup="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_LENGTH+x} ]; then echo "${VIASH_PAR_MIN_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_length='&'#" ; else echo "# par_min_length="; fi ) +$( if [ ! -z ${VIASH_PAR_FORMAT+x} ]; then echo "${VIASH_PAR_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_format='&'#" ; else echo "# par_format="; fi ) +$( if [ ! -z ${VIASH_PAR_CONTAMINANTS+x} ]; then echo "${VIASH_PAR_CONTAMINANTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_contaminants='&'#" ; else echo "# par_contaminants="; fi ) +$( if [ ! -z ${VIASH_PAR_ADAPTERS+x} ]; then echo "${VIASH_PAR_ADAPTERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapters='&'#" ; else echo "# par_adapters="; fi ) +$( if [ ! -z ${VIASH_PAR_LIMITS+x} ]; then echo "${VIASH_PAR_LIMITS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_limits='&'#" ; else echo "# par_limits="; fi ) +$( if [ ! -z ${VIASH_PAR_KMERS+x} ]; then echo "${VIASH_PAR_KMERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kmers='&'#" ; else echo "# par_kmers="; fi ) +$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +# exit on error +set -eo pipefail + +# Check if both outputs are empty, at least one must be passed. +if [[ -z "\\$par_html" ]] && [[ -z "\\$par_zip" ]] && [[ -z "\\$par_summary" ]] && [[ -z "\\$par_data" ]]; then + echo "Error: At least one of the output arguments (--html, --zip, --summary, and --data) must be passed." + exit 1 +fi + +# unset flags +unset_if_false=( + par_casava + par_nano + par_nofilter + par_extract + par_noextract + par_nogroup + par_quiet +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +tmpdir=\\$(mktemp -d "\\${meta_temp_dir}/\\${meta_name}-XXXXXXXX") +function clean_up { + rm -rf "\\$tmpdir" +} +trap clean_up EXIT + +# Create input array +IFS=";" read -ra input <<< \\$par_input + +# Run fastqc +fastqc \\\\ + --extract \\\\ + \\${par_casava:+--casava} \\\\ + \\${par_nano:+--nano} \\\\ + \\${par_nofilter:+--nofilter} \\\\ + \\${par_nogroup:+--nogroup} \\\\ + \\${par_min_length:+--min_length "\\$par_min_length"} \\\\ + \\${par_format:+--format "\\$par_format"} \\\\ + \\${par_contaminants:+--contaminants "\\$par_contaminants"} \\\\ + \\${par_adapters:+--adapters "\\$par_adapters"} \\\\ + \\${par_limits:+--limits "\\$par_limits"} \\\\ + \\${par_kmers:+--kmers "\\$par_kmers"} \\\\ + \\${par_quiet:+--quiet} \\\\ + \\${meta_cpus:+--threads "\\$meta_cpus"} \\\\ + \\${meta_temp_dir:+--dir "\\$meta_temp_dir"} \\\\ + --outdir "\\${tmpdir}" \\\\ + "\\${input[@]}" + +# Move output files +for file in "\\${input[@]}"; do + # Removes everthing after the first dot of the basename + sample_name=\\$(basename "\\${file}" | sed 's/\\\\..*\\$//') + if [[ -n "\\$par_html" ]]; then + input_html="\\${tmpdir}/\\${sample_name}_fastqc.html" + html_file="\\${par_html//\\\\*/\\$sample_name}" + mv "\\$input_html" "\\$html_file" + fi + if [[ -n "\\$par_zip" ]]; then + input_zip="\\${tmpdir}/\\${sample_name}_fastqc.zip" + zip_file="\\${par_zip//\\\\*/\\$sample_name}" + mv "\\$input_zip" "\\$zip_file" + fi + if [[ -n "\\$par_summary" ]]; then + summary_file="\\${tmpdir}/\\${sample_name}_fastqc/summary.txt" + new_summary="\\${par_summary//\\\\*/\\$sample_name}" + mv "\\$summary_file" "\\$new_summary" + fi + if [[ -n "\\$par_data" ]]; then + data_file="\\${tmpdir}/\\${sample_name}_fastqc/fastqc_data.txt" + new_data="\\${par_data//\\\\*/\\$sample_name}" + mv "\\$data_file" "\\$new_data" + fi + # Remove the extracted directory + rm -r "\\${tmpdir}/\\${sample_name}_fastqc" +done +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/fastqc", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/nextflow.config new file mode 100644 index 0000000..8e0ce66 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'fastqc' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'FastQC - A high throughput sequence QC analysis tool.' + author = 'Theodoro Gasperin Terra Camargo' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/nextflow_schema.json new file mode 100644 index 0000000..b85804c --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/nextflow_schema.json @@ -0,0 +1,257 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "fastqc", +"description": "FastQC - A high throughput sequence QC analysis tool.", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `input.fq`, multiple_sep: `\";\"`. FASTQ file(s) to be analyzed", + "help_text": "Type: List of `file`, required, example: `input.fq`, multiple_sep: `\";\"`. FASTQ file(s) to be analyzed.\n" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "At least one of the output options (--html, --zip, --summary, --data) must be used.\n", + "properties": { + + + "html": { + "type": + "string", + "description": "Type: List of `file`, default: `$id.$key.html_*.html`, example: `*.html`, multiple_sep: `\";\"`. Create the HTML report of the results", + "help_text": "Type: List of `file`, default: `$id.$key.html_*.html`, example: `*.html`, multiple_sep: `\";\"`. Create the HTML report of the results. \n\u0027*\u0027 wild card must be provided in the output file name. \nWild card will be replaced by the input file basename.\ne.g. \n --input \"sample_1.fq\"\n --html \"*.html\"\n would create an output html file named sample_1.html\n" + , + "default":"$id.$key.html_*.html" + } + + + , + "zip": { + "type": + "string", + "description": "Type: List of `file`, default: `$id.$key.zip_*.zip`, example: `*.zip`, multiple_sep: `\";\"`. Create the zip file(s) containing: html report, data, images, icons, summary, etc", + "help_text": "Type: List of `file`, default: `$id.$key.zip_*.zip`, example: `*.zip`, multiple_sep: `\";\"`. Create the zip file(s) containing: html report, data, images, icons, summary, etc.\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --html \"*.zip\"\n would create an output zip file named sample_1.zip\n" + , + "default":"$id.$key.zip_*.zip" + } + + + , + "summary": { + "type": + "string", + "description": "Type: List of `file`, default: `$id.$key.summary_*.txt`, example: `*_summary.txt`, multiple_sep: `\";\"`. Create the summary file(s)", + "help_text": "Type: List of `file`, default: `$id.$key.summary_*.txt`, example: `*_summary.txt`, multiple_sep: `\";\"`. Create the summary file(s).\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --summary \"*_summary.txt\"\n would create an output summary.txt file named sample_1_summary.txt\n" + , + "default":"$id.$key.summary_*.txt" + } + + + , + "data": { + "type": + "string", + "description": "Type: List of `file`, default: `$id.$key.data_*.txt`, example: `*_data.txt`, multiple_sep: `\";\"`. Create the data file(s)", + "help_text": "Type: List of `file`, default: `$id.$key.data_*.txt`, example: `*_data.txt`, multiple_sep: `\";\"`. Create the data file(s).\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --summary \"*_data.txt\"\n would create an output data.txt file named sample_1_data.txt\n" + , + "default":"$id.$key.data_*.txt" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "casava": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Files come from raw casava output", + "help_text": "Type: `boolean_true`, default: `false`. Files come from raw casava output. Files in the same sample\ngroup (differing only by the group number) will be analysed\nas a set rather than individually. Sequences with the filter\nflag set in the header will be excluded from the analysis.\nFiles must have the same names given to them by casava\n(including being gzipped and ending with .gz) otherwise they\nwon\u0027t be grouped together correctly.\n" + , + "default":false + } + + + , + "nano": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Files come from nanopore sequences and are in fast5 format", + "help_text": "Type: `boolean_true`, default: `false`. Files come from nanopore sequences and are in fast5 format. In\nthis mode you can pass in directories to process and the program\nwill take in all fast5 files within those directories and produce\na single output file from the sequences found in all files.\n" + , + "default":false + } + + + , + "nofilter": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If running with --casava then don\u0027t remove read flagged by\ncasava as poor quality when performing the QC analysis", + "help_text": "Type: `boolean_true`, default: `false`. If running with --casava then don\u0027t remove read flagged by\ncasava as poor quality when performing the QC analysis.\n" + , + "default":false + } + + + , + "nogroup": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp", + "help_text": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp. \nAll reports will show data for every base in the read. \nWARNING: Using this option will cause fastqc to crash \nand burn if you use it on really long reads, and your \nplots may end up a ridiculous size. You have been warned!\n" + , + "default":false + } + + + , + "min_length": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Sets an artificial lower limit on the length of the \nsequence to be shown in the report", + "help_text": "Type: `integer`, example: `0`. Sets an artificial lower limit on the length of the \nsequence to be shown in the report. As long as you \nset this to a value greater or equal to your longest \nread length then this will be the sequence length used \nto create your read groups. This can be useful for making\ndirectly comparable statistics from datasets with somewhat \nvariable read lengths.\n" + + } + + + , + "format": { + "type": + "string", + "description": "Type: `string`, example: `bam`. Bypasses the normal sequence file format detection and \nforces the program to use the specified format", + "help_text": "Type: `string`, example: `bam`. Bypasses the normal sequence file format detection and \nforces the program to use the specified format. \nValid formats are bam, sam, bam_mapped, sam_mapped, and fastq.\n" + + } + + + , + "contaminants": { + "type": + "string", + "description": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against", + "help_text": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against. \nThe file must contain sets of named contaminants in the form\nname[tab]sequence. Lines prefixed with a hash will be ignored.\n" + + } + + + , + "adapters": { + "type": + "string", + "description": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library", + "help_text": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library. The file must contain sets of named adapters \nin the form name[tab]sequence. Lines prefixed with a hash will be ignored.\n" + + } + + + , + "limits": { + "type": + "string", + "description": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules", + "help_text": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules. \nThis file can also be used to selectively remove \nsome modules from the output altogether. The format \nneeds to mirror the default limits.txt file found in \nthe Configuration folder.\n" + + } + + + , + "kmers": { + "type": + "integer", + "description": "Type: `integer`, example: `7`. Specifies the length of Kmer to look for in the Kmer \ncontent module", + "help_text": "Type: `integer`, example: `7`. Specifies the length of Kmer to look for in the Kmer \ncontent module. Specified Kmer length must be between \n2 and 10. Default length is 7 if not specified.\n" + + } + + + , + "quiet": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Suppress all progress messages on stdout and only report errors", + "help_text": "Type: `boolean_true`, default: `false`. Suppress all progress messages on stdout and only report errors.\n" + , + "default":false + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/.config.vsh.yaml new file mode 100644 index 0000000..fb8f0dc --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/.config.vsh.yaml @@ -0,0 +1,686 @@ +name: "featurecounts" +version: "v0.3.1" +authors: +- name: "Sai Nirmayi Yasa" + roles: + - "author" + - "maintainer" + info: + links: + email: "nirmayi@data-intuitive.com" + github: "sainirmayi" + linkedin: "sai-nirmayi-yasa" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Junior Bioinformatics Researcher" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--annotation" + alternatives: + - "-a" + description: "Name of an annotation file. GTF/GFF format by default. See '--format'\ + \ option for more format information.\n" + info: null + example: + - "annotation.gtf" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "A list of SAM or BAM format files separated by semi-colon (;). They\ + \ can be either name or location sorted. Location-sorted paired-end reads are\ + \ automatically sorted by read names.\n" + info: null + example: + - "input_file1.bam" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--counts" + alternatives: + - "-o" + description: "Name of output file including read counts in tab delimited format.\n" + info: null + example: + - "features.tsv" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--summary" + description: "Summary statistics of counting results in tab delimited format.\n" + info: null + example: + - "summary.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junctions" + description: "Count number of reads supporting each exon-exon junction. Junctions\ + \ were identified from those exon-spanning reads in the input (containing 'N'\ + \ in CIGAR string).\n" + info: null + example: + - "junctions.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Annotation" + arguments: + - type: "string" + name: "--format" + alternatives: + - "-F" + description: "Specify format of the provided annotation file. Acceptable formats\ + \ include 'GTF' (or compatible GFF format) and 'SAF'. 'GTF' by default. \n" + info: null + example: + - "GTF" + required: false + choices: + - "GTF" + - "GFF" + - "SAF" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--feature_type" + alternatives: + - "-t" + description: "Specify feature type(s) in a GTF annotation. If multiple types are\ + \ provided, they should be separated by ';' with no space in between. 'exon'\ + \ by default. Rows in the annotation with a matched feature will be extracted\ + \ and used for read mapping.\n" + info: null + example: + - "exon" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--attribute_type" + alternatives: + - "-g" + description: "Specify attribute type in GTF annotation. 'gene_id' by default.\ + \ Meta-features used for read counting will be extracted from annotation using\ + \ the provided value.\n" + info: null + example: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_attributes" + description: "Extract extra attribute types from the provided GTF annotation and\ + \ include them in the counting output. These attribute types will not be used\ + \ to group features. If more than one attribute type is provided they should\ + \ be separated by semicolon (;).\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--chrom_alias" + alternatives: + - "-A" + description: "Provide a chromosome name alias file to match chr names in annotation\ + \ with those in the reads. This should be a two-column comma-delimited text\ + \ file. Its first column should include chr names in the annotation and its\ + \ second column should include chr names in the reads. Chr names are case sensitive.\ + \ No column header should be included in the file.\n" + info: null + example: + - "chrom_alias.csv" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Level of summarization" + arguments: + - type: "boolean_true" + name: "--feature_level" + alternatives: + - "-f" + description: "Perform read counting at feature level (eg. counting reads for exons\ + \ rather than genes).\n" + info: null + direction: "input" +- name: "Overlap between reads and features" + arguments: + - type: "boolean_true" + name: "--overlapping" + alternatives: + - "-O" + description: "Assign reads to all their overlapping meta-features (or features\ + \ if '--feature_level' is specified).\n" + info: null + direction: "input" + - type: "integer" + name: "--min_overlap" + description: "Minimum number of overlapping bases in a read that is required for\ + \ read assignment. 1 by default. Number of overlapping bases is counted from\ + \ both reads if paired end. If a negative value is provided, then a gap of up\ + \ to specified size will be allowed between read and the feature that the read\ + \ is assigned to.\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--frac_overlap" + description: "Minimum fraction of overlapping bases in a read that is required\ + \ for read assignment. Value should be within range [0,1]. 0 by default. Number\ + \ of overlapping bases is counted from both reads if paired end. Both this option\ + \ and '--min_overlap' option need to be satisfied for read assignment.\n" + info: null + example: + - 0.0 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--frac_overlap_feature" + description: "Minimum fraction of overlapping bases in a feature that is required\ + \ for read assignment. Value should be within range [0,1]. 0 by default.\n" + info: null + example: + - 0.0 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--largest_overlap" + description: "Assign reads to a meta-feature/feature that has the largest number\ + \ of overlapping bases.\n" + info: null + direction: "input" + - type: "integer" + name: "--non_overlap" + description: "Maximum number of non-overlapping bases in a read (or a read pair)\ + \ that is allowed when being assigned to a feature. No limit is set by default.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--non_overlap_feature" + description: "Maximum number of non-overlapping bases in a feature that is allowed\ + \ in read assignment. No limit is set by default.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--read_extension5" + description: "Reads are extended upstream by bases from their 5' end.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--read_extension3" + description: "Reads are extended upstream by bases from their 3' end.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--read2pos" + description: "Reduce reads to their 5' most base or 3' most base. Read counting\ + \ is then performed based on the single base the read is reduced to.\n" + info: null + required: false + choices: + - 3 + - 5 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Multi-mapping reads" + arguments: + - type: "boolean_true" + name: "--multi_mapping" + alternatives: + - "-M" + description: "Multi-mapping reads will also be counted. For a multi-mapping read,\ + \ all its reported alignments will be counted. The 'NH' tag in BAM/SAM input\ + \ is used to detect multi-mapping reads.\n" + info: null + direction: "input" +- name: "Fractional counting" + arguments: + - type: "boolean_true" + name: "--fraction" + description: "Assign fractional counts to features. This option must be used together\ + \ with '--multi_mapping' or '--overlapping' or both. When '--multi_mapping'\ + \ is specified, each reported alignment from a multi-mapping read (identified\ + \ via 'NH' tag) will carry a fractional count of 1/x, instead of 1 (one), where\ + \ x is the total number of alignments reported for the same read. When '--overlapping'\ + \ is specified, each overlapping feature will receive a fractional count of\ + \ 1/y, where y is the total number of features overlapping with the read. When\ + \ both '--multi_mapping' and '--overlapping' are specified, each alignment will\ + \ carry a fractional count of 1/(x*y).\n" + info: null + direction: "input" +- name: "Read filtering" + arguments: + - type: "integer" + name: "--min_map_quality" + alternatives: + - "-Q" + description: "The minimum mapping quality score a read must satisfy in order to\ + \ be counted. For paired-end reads, at least one end should satisfy this criteria.\ + \ 0 by default.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--split_only" + description: "Count split alignments only (ie. alignments with CIGAR string containing\ + \ 'N'). An example of split alignments is exon-spanning reads in RNA-seq data.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--non_split_only" + description: "If specified, only non-split alignments (CIGAR strings do not contain\ + \ letter 'N') will be counted. All the other alignments will be ignored.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--primary" + description: "Count primary alignments only. Primary alignments are identified\ + \ using bit 0x100 in SAM/BAM FLAG field.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--ignore_dup" + description: "Ignore duplicate reads in read counting. Duplicate reads are identified\ + \ using bit Ox400 in BAM/SAM FLAG field. The whole read pair is ignored if one\ + \ of the reads is a duplicate read for paired end data.\n" + info: null + direction: "input" +- name: "Strandedness" + arguments: + - type: "integer" + name: "--strand" + alternatives: + - "-s" + description: "Perform strand-specific read counting. A single integer value (applied\ + \ to all input files) should be provided. Possible values include: 0 (unstranded),\ + \ 1 (stranded) and 2 (reversely stranded). Default value is 0 (ie. unstranded\ + \ read counting carried out for all input files).\n" + info: null + example: + - 0 + required: false + choices: + - 0 + - 1 + - 2 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Exon-exon junctions" + arguments: + - type: "file" + name: "--ref_fasta" + alternatives: + - "-G" + description: "Provide the name of a FASTA-format file that contains the reference\ + \ sequences used in read mapping that produced the provided SAM/BAM files.\n" + info: null + example: + - "reference.fasta" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Parameters specific to paired end reads" + arguments: + - type: "boolean_true" + name: "--paired" + alternatives: + - "-p" + description: "Specify that input data contain paired-end reads. To perform fragment\ + \ counting (ie. counting read pairs), the '--countReadPairs' parameter should\ + \ also be specified in addition to this parameter.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--count_read_pairs" + description: "Count read pairs (fragments) instead of reads. This option is only\ + \ applicable for paired-end reads.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--both_aligned" + alternatives: + - "-B" + description: "Count read pairs (fragments) instead of reads. This option is only\ + \ applicable for paired-end reads.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--check_pe_dist" + alternatives: + - "-P" + description: "Check validity of paired-end distance when counting read pairs.\ + \ Use '--min_length' and '--max_length' to set thresholds.\n" + info: null + direction: "input" + - type: "integer" + name: "--min_length" + alternatives: + - "-d" + description: "Minimum fragment/template length, 50 by default.\n" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_length" + alternatives: + - "-D" + description: "Maximum fragment/template length, 600 by default.\n" + info: null + example: + - 600 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--same_strand" + alternatives: + - "-C" + description: "Do not count read pairs that have their two ends mapping to different\ + \ chromosomes or mapping to same chromosome but on different strands.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--donotsort" + description: "Do not sort reads in BAM/SAM input. Note that reads from the same\ + \ pair are required to be located next to each other in the input.\n" + info: null + direction: "input" +- name: "Read groups" + arguments: + - type: "boolean_true" + name: "--by_read_group" + description: "Assign reads by read group. \"RG\" tag is required to be present\ + \ in the input BAM/SAM files.\n" + info: null + direction: "input" +- name: "Long reads" + arguments: + - type: "boolean_true" + name: "--long_reads" + description: "Count long reads such as Nanopore and PacBio reads. Long read counting\ + \ can only run in one thread and only reads (not read-pairs) can be counted.\ + \ There is no limitation on the number of 'M' operations allowed in a CIGAR\ + \ string in long read counting.\n" + info: null + direction: "input" +- name: "Assignment results for each read" + arguments: + - type: "file" + name: "--detailed_results" + description: "Directory to save the detailed assignment results. Use `--detailed_results_format`\ + \ to determine the format of the detailed results.\n" + info: null + example: + - "detailed_results" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--detailed_results_format" + alternatives: + - "-R" + description: "Output detailed assignment results for each read or read-pair. Results\ + \ are saved to a file that is in one of the following formats: CORE, SAM and\ + \ BAM. See documentaiton for more info about these formats.\n" + info: null + required: false + choices: + - "CORE" + - "SAM" + - "BAM" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Miscellaneous" + arguments: + - type: "integer" + name: "--max_M_op" + description: "Maximum number of 'M' operations allowed in a CIGAR string. 10 by\ + \ default. Both 'X' and '=' are treated as 'M' and adjacent 'M' operations are\ + \ merged in the CIGAR string.\n" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--verbose" + description: "Output verbose information for debugging, such as un-matched chromosome/contig\ + \ names.\n" + info: null + direction: "input" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "featureCounts is a read summarization program for counting reads generated\ + \ from either RNA or genomic DNA sequencing experiments by implementing highly efficient\ + \ chromosome hashing and feature blocking techniques. It works with either single\ + \ or paired-end reads and provides a wide range of options appropriate for different\ + \ sequencing applications.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "Read counting" +- "Genomic features" +license: "GPL-3.0" +references: + doi: + - "10.1093/bioinformatics/btt656" +links: + repository: "https://github.com/ShiLab-Bioinformatics/subread" + homepage: "https://subread.sourceforge.net/" + documentation: "https://subread.sourceforge.net/SubreadUsersGuide.pdf" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/subread:2.0.6--he4a0461_0" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "featureCounts -v 2>&1 | sed 's/featureCounts v\\([0-9.]*\\)/featureCounts:\ + \ \\1/' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/featurecounts/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/featurecounts" + executable: "target/nextflow/featurecounts/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/main.nf new file mode 100644 index 0000000..2dba23a --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/main.nf @@ -0,0 +1,4490 @@ +// featurecounts v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Sai Nirmayi Yasa (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "featurecounts", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Sai Nirmayi Yasa", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "nirmayi@data-intuitive.com", + "github" : "sainirmayi", + "linkedin" : "sai-nirmayi-yasa" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Junior Bioinformatics Researcher" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--annotation", + "alternatives" : [ + "-a" + ], + "description" : "Name of an annotation file. GTF/GFF format by default. See '--format' option for more format information.\n", + "example" : [ + "annotation.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "A list of SAM or BAM format files separated by semi-colon (;). They can be either name or location sorted. Location-sorted paired-end reads are automatically sorted by read names.\n", + "example" : [ + "input_file1.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--counts", + "alternatives" : [ + "-o" + ], + "description" : "Name of output file including read counts in tab delimited format.\n", + "example" : [ + "features.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--summary", + "description" : "Summary statistics of counting results in tab delimited format.\n", + "example" : [ + "summary.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junctions", + "description" : "Count number of reads supporting each exon-exon junction. Junctions were identified from those exon-spanning reads in the input (containing 'N' in CIGAR string).\n", + "example" : [ + "junctions.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Annotation", + "arguments" : [ + { + "type" : "string", + "name" : "--format", + "alternatives" : [ + "-F" + ], + "description" : "Specify format of the provided annotation file. Acceptable formats include 'GTF' (or compatible GFF format) and 'SAF'. 'GTF' by default. \n", + "example" : [ + "GTF" + ], + "required" : false, + "choices" : [ + "GTF", + "GFF", + "SAF" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--feature_type", + "alternatives" : [ + "-t" + ], + "description" : "Specify feature type(s) in a GTF annotation. If multiple types are provided, they should be separated by ';' with no space in between. 'exon' by default. Rows in the annotation with a matched feature will be extracted and used for read mapping.\n", + "example" : [ + "exon" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--attribute_type", + "alternatives" : [ + "-g" + ], + "description" : "Specify attribute type in GTF annotation. 'gene_id' by default. Meta-features used for read counting will be extracted from annotation using the provided value.\n", + "example" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extra_attributes", + "description" : "Extract extra attribute types from the provided GTF annotation and include them in the counting output. These attribute types will not be used to group features. If more than one attribute type is provided they should be separated by semicolon (;).\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chrom_alias", + "alternatives" : [ + "-A" + ], + "description" : "Provide a chromosome name alias file to match chr names in annotation with those in the reads. This should be a two-column comma-delimited text file. Its first column should include chr names in the annotation and its second column should include chr names in the reads. Chr names are case sensitive. No column header should be included in the file.\n", + "example" : [ + "chrom_alias.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Level of summarization", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--feature_level", + "alternatives" : [ + "-f" + ], + "description" : "Perform read counting at feature level (eg. counting reads for exons rather than genes).\n", + "direction" : "input" + } + ] + }, + { + "name" : "Overlap between reads and features", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--overlapping", + "alternatives" : [ + "-O" + ], + "description" : "Assign reads to all their overlapping meta-features (or features if '--feature_level' is specified).\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--min_overlap", + "description" : "Minimum number of overlapping bases in a read that is required for read assignment. 1 by default. Number of overlapping bases is counted from both reads if paired end. If a negative value is provided, then a gap of up to specified size will be allowed between read and the feature that the read is assigned to.\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--frac_overlap", + "description" : "Minimum fraction of overlapping bases in a read that is required for read assignment. Value should be within range [0,1]. 0 by default. Number of overlapping bases is counted from both reads if paired end. Both this option and '--min_overlap' option need to be satisfied for read assignment.\n", + "example" : [ + 0.0 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--frac_overlap_feature", + "description" : "Minimum fraction of overlapping bases in a feature that is required for read assignment. Value should be within range [0,1]. 0 by default.\n", + "example" : [ + 0.0 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--largest_overlap", + "description" : "Assign reads to a meta-feature/feature that has the largest number of overlapping bases.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--non_overlap", + "description" : "Maximum number of non-overlapping bases in a read (or a read pair) that is allowed when being assigned to a feature. No limit is set by default.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--non_overlap_feature", + "description" : "Maximum number of non-overlapping bases in a feature that is allowed in read assignment. No limit is set by default.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--read_extension5", + "description" : "Reads are extended upstream by bases from their 5' end.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--read_extension3", + "description" : "Reads are extended upstream by bases from their 3' end.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--read2pos", + "description" : "Reduce reads to their 5' most base or 3' most base. Read counting is then performed based on the single base the read is reduced to.\n", + "required" : false, + "choices" : [ + 3, + 5 + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Multi-mapping reads", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--multi_mapping", + "alternatives" : [ + "-M" + ], + "description" : "Multi-mapping reads will also be counted. For a multi-mapping read, all its reported alignments will be counted. The 'NH' tag in BAM/SAM input is used to detect multi-mapping reads.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Fractional counting", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--fraction", + "description" : "Assign fractional counts to features. This option must be used together with '--multi_mapping' or '--overlapping' or both. When '--multi_mapping' is specified, each reported alignment from a multi-mapping read (identified via 'NH' tag) will carry a fractional count of 1/x, instead of 1 (one), where x is the total number of alignments reported for the same read. When '--overlapping' is specified, each overlapping feature will receive a fractional count of 1/y, where y is the total number of features overlapping with the read. When both '--multi_mapping' and '--overlapping' are specified, each alignment will carry a fractional count of 1/(x*y).\n", + "direction" : "input" + } + ] + }, + { + "name" : "Read filtering", + "arguments" : [ + { + "type" : "integer", + "name" : "--min_map_quality", + "alternatives" : [ + "-Q" + ], + "description" : "The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 0 by default.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--split_only", + "description" : "Count split alignments only (ie. alignments with CIGAR string containing 'N'). An example of split alignments is exon-spanning reads in RNA-seq data.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--non_split_only", + "description" : "If specified, only non-split alignments (CIGAR strings do not contain letter 'N') will be counted. All the other alignments will be ignored.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--primary", + "description" : "Count primary alignments only. Primary alignments are identified using bit 0x100 in SAM/BAM FLAG field.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--ignore_dup", + "description" : "Ignore duplicate reads in read counting. Duplicate reads are identified using bit Ox400 in BAM/SAM FLAG field. The whole read pair is ignored if one of the reads is a duplicate read for paired end data.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Strandedness", + "arguments" : [ + { + "type" : "integer", + "name" : "--strand", + "alternatives" : [ + "-s" + ], + "description" : "Perform strand-specific read counting. A single integer value (applied to all input files) should be provided. Possible values include: 0 (unstranded), 1 (stranded) and 2 (reversely stranded). Default value is 0 (ie. unstranded read counting carried out for all input files).\n", + "example" : [ + 0 + ], + "required" : false, + "choices" : [ + 0, + 1, + 2 + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Exon-exon junctions", + "arguments" : [ + { + "type" : "file", + "name" : "--ref_fasta", + "alternatives" : [ + "-G" + ], + "description" : "Provide the name of a FASTA-format file that contains the reference sequences used in read mapping that produced the provided SAM/BAM files.\n", + "example" : [ + "reference.fasta" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Parameters specific to paired end reads", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--paired", + "alternatives" : [ + "-p" + ], + "description" : "Specify that input data contain paired-end reads. To perform fragment counting (ie. counting read pairs), the '--countReadPairs' parameter should also be specified in addition to this parameter.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--count_read_pairs", + "description" : "Count read pairs (fragments) instead of reads. This option is only applicable for paired-end reads.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--both_aligned", + "alternatives" : [ + "-B" + ], + "description" : "Count read pairs (fragments) instead of reads. This option is only applicable for paired-end reads.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--check_pe_dist", + "alternatives" : [ + "-P" + ], + "description" : "Check validity of paired-end distance when counting read pairs. Use '--min_length' and '--max_length' to set thresholds.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--min_length", + "alternatives" : [ + "-d" + ], + "description" : "Minimum fragment/template length, 50 by default.\n", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--max_length", + "alternatives" : [ + "-D" + ], + "description" : "Maximum fragment/template length, 600 by default.\n", + "example" : [ + 600 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--same_strand", + "alternatives" : [ + "-C" + ], + "description" : "Do not count read pairs that have their two ends mapping to different chromosomes or mapping to same chromosome but on different strands.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--donotsort", + "description" : "Do not sort reads in BAM/SAM input. Note that reads from the same pair are required to be located next to each other in the input.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Read groups", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--by_read_group", + "description" : "Assign reads by read group. \\"RG\\" tag is required to be present in the input BAM/SAM files.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Long reads", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--long_reads", + "description" : "Count long reads such as Nanopore and PacBio reads. Long read counting can only run in one thread and only reads (not read-pairs) can be counted. There is no limitation on the number of 'M' operations allowed in a CIGAR string in long read counting.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Assignment results for each read", + "arguments" : [ + { + "type" : "file", + "name" : "--detailed_results", + "description" : "Directory to save the detailed assignment results. Use `--detailed_results_format` to determine the format of the detailed results.\n", + "example" : [ + "detailed_results" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--detailed_results_format", + "alternatives" : [ + "-R" + ], + "description" : "Output detailed assignment results for each read or read-pair. Results are saved to a file that is in one of the following formats: CORE, SAM and BAM. See documentaiton for more info about these formats.\n", + "required" : false, + "choices" : [ + "CORE", + "SAM", + "BAM" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Miscellaneous", + "arguments" : [ + { + "type" : "integer", + "name" : "--max_M_op", + "description" : "Maximum number of 'M' operations allowed in a CIGAR string. 10 by default. Both 'X' and '=' are treated as 'M' and adjacent 'M' operations are merged in the CIGAR string.\n", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--verbose", + "description" : "Output verbose information for debugging, such as un-matched chromosome/contig names.\n", + "direction" : "input" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "featureCounts is a read summarization program for counting reads generated from either RNA or genomic DNA sequencing experiments by implementing highly efficient chromosome hashing and feature blocking techniques. It works with either single or paired-end reads and provides a wide range of options appropriate for different sequencing applications.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "Read counting", + "Genomic features" + ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.1093/bioinformatics/btt656" + ] + }, + "links" : { + "repository" : "https://github.com/ShiLab-Bioinformatics/subread", + "homepage" : "https://subread.sourceforge.net/", + "documentation" : "https://subread.sourceforge.net/SubreadUsersGuide.pdf" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/subread:2.0.6--he4a0461_0", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "featureCounts -v 2>&1 | sed 's/featureCounts v\\\\([0-9.]*\\\\)/featureCounts: \\\\1/' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/featurecounts/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/featurecounts", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -e + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_ANNOTATION+x} ]; then echo "${VIASH_PAR_ANNOTATION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_annotation='&'#" ; else echo "# par_annotation="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS+x} ]; then echo "${VIASH_PAR_COUNTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts='&'#" ; else echo "# par_counts="; fi ) +$( if [ ! -z ${VIASH_PAR_SUMMARY+x} ]; then echo "${VIASH_PAR_SUMMARY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_summary='&'#" ; else echo "# par_summary="; fi ) +$( if [ ! -z ${VIASH_PAR_JUNCTIONS+x} ]; then echo "${VIASH_PAR_JUNCTIONS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_junctions='&'#" ; else echo "# par_junctions="; fi ) +$( if [ ! -z ${VIASH_PAR_FORMAT+x} ]; then echo "${VIASH_PAR_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_format='&'#" ; else echo "# par_format="; fi ) +$( if [ ! -z ${VIASH_PAR_FEATURE_TYPE+x} ]; then echo "${VIASH_PAR_FEATURE_TYPE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_feature_type='&'#" ; else echo "# par_feature_type="; fi ) +$( if [ ! -z ${VIASH_PAR_ATTRIBUTE_TYPE+x} ]; then echo "${VIASH_PAR_ATTRIBUTE_TYPE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_attribute_type='&'#" ; else echo "# par_attribute_type="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRA_ATTRIBUTES+x} ]; then echo "${VIASH_PAR_EXTRA_ATTRIBUTES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extra_attributes='&'#" ; else echo "# par_extra_attributes="; fi ) +$( if [ ! -z ${VIASH_PAR_CHROM_ALIAS+x} ]; then echo "${VIASH_PAR_CHROM_ALIAS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chrom_alias='&'#" ; else echo "# par_chrom_alias="; fi ) +$( if [ ! -z ${VIASH_PAR_FEATURE_LEVEL+x} ]; then echo "${VIASH_PAR_FEATURE_LEVEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_feature_level='&'#" ; else echo "# par_feature_level="; fi ) +$( if [ ! -z ${VIASH_PAR_OVERLAPPING+x} ]; then echo "${VIASH_PAR_OVERLAPPING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_overlapping='&'#" ; else echo "# par_overlapping="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_OVERLAP+x} ]; then echo "${VIASH_PAR_MIN_OVERLAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_overlap='&'#" ; else echo "# par_min_overlap="; fi ) +$( if [ ! -z ${VIASH_PAR_FRAC_OVERLAP+x} ]; then echo "${VIASH_PAR_FRAC_OVERLAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_frac_overlap='&'#" ; else echo "# par_frac_overlap="; fi ) +$( if [ ! -z ${VIASH_PAR_FRAC_OVERLAP_FEATURE+x} ]; then echo "${VIASH_PAR_FRAC_OVERLAP_FEATURE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_frac_overlap_feature='&'#" ; else echo "# par_frac_overlap_feature="; fi ) +$( if [ ! -z ${VIASH_PAR_LARGEST_OVERLAP+x} ]; then echo "${VIASH_PAR_LARGEST_OVERLAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_largest_overlap='&'#" ; else echo "# par_largest_overlap="; fi ) +$( if [ ! -z ${VIASH_PAR_NON_OVERLAP+x} ]; then echo "${VIASH_PAR_NON_OVERLAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_non_overlap='&'#" ; else echo "# par_non_overlap="; fi ) +$( if [ ! -z ${VIASH_PAR_NON_OVERLAP_FEATURE+x} ]; then echo "${VIASH_PAR_NON_OVERLAP_FEATURE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_non_overlap_feature='&'#" ; else echo "# par_non_overlap_feature="; fi ) +$( if [ ! -z ${VIASH_PAR_READ_EXTENSION5+x} ]; then echo "${VIASH_PAR_READ_EXTENSION5}" | sed "s#'#'\\"'\\"'#g;s#.*#par_read_extension5='&'#" ; else echo "# par_read_extension5="; fi ) +$( if [ ! -z ${VIASH_PAR_READ_EXTENSION3+x} ]; then echo "${VIASH_PAR_READ_EXTENSION3}" | sed "s#'#'\\"'\\"'#g;s#.*#par_read_extension3='&'#" ; else echo "# par_read_extension3="; fi ) +$( if [ ! -z ${VIASH_PAR_READ2POS+x} ]; then echo "${VIASH_PAR_READ2POS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_read2pos='&'#" ; else echo "# par_read2pos="; fi ) +$( if [ ! -z ${VIASH_PAR_MULTI_MAPPING+x} ]; then echo "${VIASH_PAR_MULTI_MAPPING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_multi_mapping='&'#" ; else echo "# par_multi_mapping="; fi ) +$( if [ ! -z ${VIASH_PAR_FRACTION+x} ]; then echo "${VIASH_PAR_FRACTION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fraction='&'#" ; else echo "# par_fraction="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_MAP_QUALITY+x} ]; then echo "${VIASH_PAR_MIN_MAP_QUALITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_map_quality='&'#" ; else echo "# par_min_map_quality="; fi ) +$( if [ ! -z ${VIASH_PAR_SPLIT_ONLY+x} ]; then echo "${VIASH_PAR_SPLIT_ONLY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_split_only='&'#" ; else echo "# par_split_only="; fi ) +$( if [ ! -z ${VIASH_PAR_NON_SPLIT_ONLY+x} ]; then echo "${VIASH_PAR_NON_SPLIT_ONLY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_non_split_only='&'#" ; else echo "# par_non_split_only="; fi ) +$( if [ ! -z ${VIASH_PAR_PRIMARY+x} ]; then echo "${VIASH_PAR_PRIMARY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_primary='&'#" ; else echo "# par_primary="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_DUP+x} ]; then echo "${VIASH_PAR_IGNORE_DUP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_dup='&'#" ; else echo "# par_ignore_dup="; fi ) +$( if [ ! -z ${VIASH_PAR_STRAND+x} ]; then echo "${VIASH_PAR_STRAND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strand='&'#" ; else echo "# par_strand="; fi ) +$( if [ ! -z ${VIASH_PAR_REF_FASTA+x} ]; then echo "${VIASH_PAR_REF_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ref_fasta='&'#" ; else echo "# par_ref_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNT_READ_PAIRS+x} ]; then echo "${VIASH_PAR_COUNT_READ_PAIRS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_count_read_pairs='&'#" ; else echo "# par_count_read_pairs="; fi ) +$( if [ ! -z ${VIASH_PAR_BOTH_ALIGNED+x} ]; then echo "${VIASH_PAR_BOTH_ALIGNED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_both_aligned='&'#" ; else echo "# par_both_aligned="; fi ) +$( if [ ! -z ${VIASH_PAR_CHECK_PE_DIST+x} ]; then echo "${VIASH_PAR_CHECK_PE_DIST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_check_pe_dist='&'#" ; else echo "# par_check_pe_dist="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_LENGTH+x} ]; then echo "${VIASH_PAR_MIN_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_length='&'#" ; else echo "# par_min_length="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_LENGTH+x} ]; then echo "${VIASH_PAR_MAX_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_length='&'#" ; else echo "# par_max_length="; fi ) +$( if [ ! -z ${VIASH_PAR_SAME_STRAND+x} ]; then echo "${VIASH_PAR_SAME_STRAND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_same_strand='&'#" ; else echo "# par_same_strand="; fi ) +$( if [ ! -z ${VIASH_PAR_DONOTSORT+x} ]; then echo "${VIASH_PAR_DONOTSORT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_donotsort='&'#" ; else echo "# par_donotsort="; fi ) +$( if [ ! -z ${VIASH_PAR_BY_READ_GROUP+x} ]; then echo "${VIASH_PAR_BY_READ_GROUP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_by_read_group='&'#" ; else echo "# par_by_read_group="; fi ) +$( if [ ! -z ${VIASH_PAR_LONG_READS+x} ]; then echo "${VIASH_PAR_LONG_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_long_reads='&'#" ; else echo "# par_long_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_DETAILED_RESULTS+x} ]; then echo "${VIASH_PAR_DETAILED_RESULTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_detailed_results='&'#" ; else echo "# par_detailed_results="; fi ) +$( if [ ! -z ${VIASH_PAR_DETAILED_RESULTS_FORMAT+x} ]; then echo "${VIASH_PAR_DETAILED_RESULTS_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_detailed_results_format='&'#" ; else echo "# par_detailed_results_format="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_M_OP+x} ]; then echo "${VIASH_PAR_MAX_M_OP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_M_op='&'#" ; else echo "# par_max_M_op="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +# create temporary directory +tmp_dir=\\$(mktemp -d -p "\\$meta_temp_dir" "\\${meta_name}_XXXXXX") +mkdir -p "\\$tmp_dir/temp" + +# create detailed_results directory if variable is set and directory does not exist +if [[ ! -z "\\$par_detailed_results" ]] && [[ ! -d "\\$par_detailed_results" ]]; then + mkdir -p "\\$par_detailed_results" +fi + +# replace comma with semicolon +par_feature_type=\\$(echo \\$par_feature_type | tr ',' ';') +par_extra_attributes=\\$(echo \\$par_extra_attributes | tr ',' ';') + +# unset flag variables +unset_if_false=( + par_feature_level + par_overlapping + par_largest_overlap + par_multi_mapping + par_fraction + par_split_only + par_non_split_only + par_primary + par_ignore_dup + par_paired + par_count_read_pairs + par_both_aligned + par_check_pe_dist + par_same_strand + par_donotsort + par_by_read_group + par_long_reads + par_verbose +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +IFS=";" read -ra input <<< \\$par_input + +featureCounts \\\\ + \\${par_format:+-F "\\${par_format}"} \\\\ + \\${par_feature_type:+-t "\\${par_feature_type}"} \\\\ + \\${par_attribute_type:+-g "\\${par_attribute_type}"} \\\\ + \\${par_extra_attributes:+--extraAttributes "\\${extra_attributes}"} \\\\ + \\${par_chrom_alias:+-A "\\${par_chrom_alias}"} \\\\ + \\${par_feature_level:+-f} \\\\ + \\${par_overlapping:+-O} \\\\ + \\${par_min_overlap:+--minOverlap "\\${par_min_overlap}"} \\\\ + \\${par_frac_overlap:+--fracOverlap "\\${par_frac_overlap}"} \\\\ + \\${par_frac_overlap_feature:+--fracOverlapFeature "\\${par_frac_overlap_feature}"} \\\\ + \\${par_largest_overlap:+--largestOverlap} \\\\ + \\${par_non_overlap:+--nonOverlap "\\${par_non_overlap}"} \\\\ + \\${par_non_overlap_feature:+--nonOverlapFeature "\\${par_non_overlap_feature}"} \\\\ + \\${par_read_extension5:+--readExtension5 "\\${par_read_extension5}"} \\\\ + \\${par_read_extension3:+--readExtension3 "\\${par_read_extension3}"} \\\\ + \\${par_read2pos:+--read2pos "\\${par_read2pos}"} \\\\ + \\${par_multi_mapping:+-M} \\\\ + \\${par_fraction:+--fraction} \\\\ + \\${par_min_map_quality:+-Q "\\${par_min_map_quality}"} \\\\ + \\${par_split_only:+--splitOnly} \\\\ + \\${par_non_split_only:+--nonSplitOnly} \\\\ + \\${par_primary:+--primary} \\\\ + \\${par_ignore_dup:+--ignoreDup} \\\\ + \\${par_strand:+-s "\\${par_strand}"} \\\\ + \\${par_junctions:+-J} \\\\ + \\${par_ref_fasta:+-G "\\${par_ref_fasta}"} \\\\ + \\${par_paired:+-p} \\\\ + \\${par_count_read_pairs:+--countReadPairs} \\\\ + \\${par_both_aligned:+-B} \\\\ + \\${par_check_pe_dist:+-P} \\\\ + \\${par_min_length:+-d "\\${par_min_length}"} \\\\ + \\${par_max_length:+-D "\\${par_max_length}"} \\\\ + \\${par_same_strand:+-C} \\\\ + \\${par_donotsort:+--donotsort} \\\\ + \\${par_by_read_group:+--byReadGroup} \\\\ + \\${par_long_reads:+-L} \\\\ + \\${par_detailed_results:+--Rpath "\\${par_detailed_results}"} \\\\ + \\${par_detailed_results_format:+-R "\\${par_detailed_results_format}"} \\\\ + \\${par_max_M_op:+--maxMOp "\\${par_max_M_op}"} \\\\ + \\${par_verbose:+--verbose} \\\\ + \\${meta_cpus:+-T "\\${meta_cpus}"} \\\\ + --tmpDir "\\$tmp_dir/temp" \\\\ + -a "\\$par_annotation" \\\\ + -o "\\$tmp_dir/output.txt" \\\\ + "\\${input[*]}" + +[[ ! -z "\\$par_counts" ]] && mv "\\$tmp_dir/output.txt" "\\$par_counts" +[[ ! -z "\\$par_summary" ]] && mv "\\$tmp_dir/output.txt.summary" "\\$par_summary" +if [[ ! -z "\\$par_junctions" ]] && [[ -e "\\$tmp_dir/output.txt.jcounts" ]]; then + mv "\\$tmp_dir/output.txt.jcounts" "\\$par_junctions" +fi +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/featurecounts", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/nextflow.config new file mode 100644 index 0000000..e1212a9 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'featurecounts' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'featureCounts is a read summarization program for counting reads generated from either RNA or genomic DNA sequencing experiments by implementing highly efficient chromosome hashing and feature blocking techniques. It works with either single or paired-end reads and provides a wide range of options appropriate for different sequencing applications.\n' + author = 'Sai Nirmayi Yasa' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/nextflow_schema.json new file mode 100644 index 0000000..9276bfa --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/nextflow_schema.json @@ -0,0 +1,726 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "featurecounts", +"description": "featureCounts is a read summarization program for counting reads generated from either RNA or genomic DNA sequencing experiments by implementing highly efficient chromosome hashing and feature blocking techniques. It works with either single or paired-end reads and provides a wide range of options appropriate for different sequencing applications.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "annotation": { + "type": + "string", + "description": "Type: `file`, required, example: `annotation.gtf`. Name of an annotation file", + "help_text": "Type: `file`, required, example: `annotation.gtf`. Name of an annotation file. GTF/GFF format by default. See \u0027--format\u0027 option for more format information.\n" + + } + + + , + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `input_file1.bam`, multiple_sep: `\";\"`. A list of SAM or BAM format files separated by semi-colon (;)", + "help_text": "Type: List of `file`, required, example: `input_file1.bam`, multiple_sep: `\";\"`. A list of SAM or BAM format files separated by semi-colon (;). They can be either name or location sorted. Location-sorted paired-end reads are automatically sorted by read names.\n" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "counts": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.counts.tsv`, example: `features.tsv`. Name of output file including read counts in tab delimited format", + "help_text": "Type: `file`, required, default: `$id.$key.counts.tsv`, example: `features.tsv`. Name of output file including read counts in tab delimited format.\n" + , + "default":"$id.$key.counts.tsv" + } + + + , + "summary": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.summary.tsv`, example: `summary.tsv`. Summary statistics of counting results in tab delimited format", + "help_text": "Type: `file`, default: `$id.$key.summary.tsv`, example: `summary.tsv`. Summary statistics of counting results in tab delimited format.\n" + , + "default":"$id.$key.summary.tsv" + } + + + , + "junctions": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.junctions.txt`, example: `junctions.txt`. Count number of reads supporting each exon-exon junction", + "help_text": "Type: `file`, default: `$id.$key.junctions.txt`, example: `junctions.txt`. Count number of reads supporting each exon-exon junction. Junctions were identified from those exon-spanning reads in the input (containing \u0027N\u0027 in CIGAR string).\n" + , + "default":"$id.$key.junctions.txt" + } + + +} +}, + + + "annotation" : { + "title": "Annotation", + "type": "object", + "description": "No description", + "properties": { + + + "format": { + "type": + "string", + "description": "Type: `string`, example: `GTF`, choices: ``GTF`, `GFF`, `SAF``. Specify format of the provided annotation file", + "help_text": "Type: `string`, example: `GTF`, choices: ``GTF`, `GFF`, `SAF``. Specify format of the provided annotation file. Acceptable formats include \u0027GTF\u0027 (or compatible GFF format) and \u0027SAF\u0027. \u0027GTF\u0027 by default. \n", + "enum": ["GTF", "GFF", "SAF"] + + + } + + + , + "feature_type": { + "type": + "string", + "description": "Type: List of `string`, example: `exon`, multiple_sep: `\";\"`. Specify feature type(s) in a GTF annotation", + "help_text": "Type: List of `string`, example: `exon`, multiple_sep: `\";\"`. Specify feature type(s) in a GTF annotation. If multiple types are provided, they should be separated by \u0027;\u0027 with no space in between. \u0027exon\u0027 by default. Rows in the annotation with a matched feature will be extracted and used for read mapping.\n" + + } + + + , + "attribute_type": { + "type": + "string", + "description": "Type: `string`, example: `gene_id`. Specify attribute type in GTF annotation", + "help_text": "Type: `string`, example: `gene_id`. Specify attribute type in GTF annotation. \u0027gene_id\u0027 by default. Meta-features used for read counting will be extracted from annotation using the provided value.\n" + + } + + + , + "extra_attributes": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. Extract extra attribute types from the provided GTF annotation and include them in the counting output", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. Extract extra attribute types from the provided GTF annotation and include them in the counting output. These attribute types will not be used to group features. If more than one attribute type is provided they should be separated by semicolon (;).\n" + + } + + + , + "chrom_alias": { + "type": + "string", + "description": "Type: `file`, example: `chrom_alias.csv`. Provide a chromosome name alias file to match chr names in annotation with those in the reads", + "help_text": "Type: `file`, example: `chrom_alias.csv`. Provide a chromosome name alias file to match chr names in annotation with those in the reads. This should be a two-column comma-delimited text file. Its first column should include chr names in the annotation and its second column should include chr names in the reads. Chr names are case sensitive. No column header should be included in the file.\n" + + } + + +} +}, + + + "level of summarization" : { + "title": "Level of summarization", + "type": "object", + "description": "No description", + "properties": { + + + "feature_level": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Perform read counting at feature level (eg", + "help_text": "Type: `boolean_true`, default: `false`. Perform read counting at feature level (eg. counting reads for exons rather than genes).\n" + , + "default":false + } + + +} +}, + + + "overlap between reads and features" : { + "title": "Overlap between reads and features", + "type": "object", + "description": "No description", + "properties": { + + + "overlapping": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Assign reads to all their overlapping meta-features (or features if \u0027--feature_level\u0027 is specified)", + "help_text": "Type: `boolean_true`, default: `false`. Assign reads to all their overlapping meta-features (or features if \u0027--feature_level\u0027 is specified).\n" + , + "default":false + } + + + , + "min_overlap": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Minimum number of overlapping bases in a read that is required for read assignment", + "help_text": "Type: `integer`, example: `1`. Minimum number of overlapping bases in a read that is required for read assignment. 1 by default. Number of overlapping bases is counted from both reads if paired end. If a negative value is provided, then a gap of up to specified size will be allowed between read and the feature that the read is assigned to.\n" + + } + + + , + "frac_overlap": { + "type": + "number", + "description": "Type: `double`, example: `0.0`. Minimum fraction of overlapping bases in a read that is required for read assignment", + "help_text": "Type: `double`, example: `0.0`. Minimum fraction of overlapping bases in a read that is required for read assignment. Value should be within range [0,1]. 0 by default. Number of overlapping bases is counted from both reads if paired end. Both this option and \u0027--min_overlap\u0027 option need to be satisfied for read assignment.\n" + + } + + + , + "frac_overlap_feature": { + "type": + "number", + "description": "Type: `double`, example: `0.0`. Minimum fraction of overlapping bases in a feature that is required for read assignment", + "help_text": "Type: `double`, example: `0.0`. Minimum fraction of overlapping bases in a feature that is required for read assignment. Value should be within range [0,1]. 0 by default.\n" + + } + + + , + "largest_overlap": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Assign reads to a meta-feature/feature that has the largest number of overlapping bases", + "help_text": "Type: `boolean_true`, default: `false`. Assign reads to a meta-feature/feature that has the largest number of overlapping bases.\n" + , + "default":false + } + + + , + "non_overlap": { + "type": + "integer", + "description": "Type: `integer`. Maximum number of non-overlapping bases in a read (or a read pair) that is allowed when being assigned to a feature", + "help_text": "Type: `integer`. Maximum number of non-overlapping bases in a read (or a read pair) that is allowed when being assigned to a feature. No limit is set by default.\n" + + } + + + , + "non_overlap_feature": { + "type": + "integer", + "description": "Type: `integer`. Maximum number of non-overlapping bases in a feature that is allowed in read assignment", + "help_text": "Type: `integer`. Maximum number of non-overlapping bases in a feature that is allowed in read assignment. No limit is set by default.\n" + + } + + + , + "read_extension5": { + "type": + "integer", + "description": "Type: `integer`. Reads are extended upstream by \u003cint\u003e bases from their 5\u0027 end", + "help_text": "Type: `integer`. Reads are extended upstream by \u003cint\u003e bases from their 5\u0027 end.\n" + + } + + + , + "read_extension3": { + "type": + "integer", + "description": "Type: `integer`. Reads are extended upstream by \u003cint\u003e bases from their 3\u0027 end", + "help_text": "Type: `integer`. Reads are extended upstream by \u003cint\u003e bases from their 3\u0027 end.\n" + + } + + + , + "read2pos": { + "type": + "integer", + "description": "Type: `integer`, choices: ``3`, `5``. Reduce reads to their 5\u0027 most base or 3\u0027 most base", + "help_text": "Type: `integer`, choices: ``3`, `5``. Reduce reads to their 5\u0027 most base or 3\u0027 most base. Read counting is then performed based on the single base the read is reduced to.\n", + "enum": [3, 5] + + + } + + +} +}, + + + "multi-mapping reads" : { + "title": "Multi-mapping reads", + "type": "object", + "description": "No description", + "properties": { + + + "multi_mapping": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Multi-mapping reads will also be counted", + "help_text": "Type: `boolean_true`, default: `false`. Multi-mapping reads will also be counted. For a multi-mapping read, all its reported alignments will be counted. The \u0027NH\u0027 tag in BAM/SAM input is used to detect multi-mapping reads.\n" + , + "default":false + } + + +} +}, + + + "fractional counting" : { + "title": "Fractional counting", + "type": "object", + "description": "No description", + "properties": { + + + "fraction": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Assign fractional counts to features", + "help_text": "Type: `boolean_true`, default: `false`. Assign fractional counts to features. This option must be used together with \u0027--multi_mapping\u0027 or \u0027--overlapping\u0027 or both. When \u0027--multi_mapping\u0027 is specified, each reported alignment from a multi-mapping read (identified via \u0027NH\u0027 tag) will carry a fractional count of 1/x, instead of 1 (one), where x is the total number of alignments reported for the same read. When \u0027--overlapping\u0027 is specified, each overlapping feature will receive a fractional count of 1/y, where y is the total number of features overlapping with the read. When both \u0027--multi_mapping\u0027 and \u0027--overlapping\u0027 are specified, each alignment will carry a fractional count of 1/(x*y).\n" + , + "default":false + } + + +} +}, + + + "read filtering" : { + "title": "Read filtering", + "type": "object", + "description": "No description", + "properties": { + + + "min_map_quality": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. The minimum mapping quality score a read must satisfy in order to be counted", + "help_text": "Type: `integer`, example: `0`. The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 0 by default.\n" + + } + + + , + "split_only": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Count split alignments only (ie", + "help_text": "Type: `boolean_true`, default: `false`. Count split alignments only (ie. alignments with CIGAR string containing \u0027N\u0027). An example of split alignments is exon-spanning reads in RNA-seq data.\n" + , + "default":false + } + + + , + "non_split_only": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If specified, only non-split alignments (CIGAR strings do not contain letter \u0027N\u0027) will be counted", + "help_text": "Type: `boolean_true`, default: `false`. If specified, only non-split alignments (CIGAR strings do not contain letter \u0027N\u0027) will be counted. All the other alignments will be ignored.\n" + , + "default":false + } + + + , + "primary": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Count primary alignments only", + "help_text": "Type: `boolean_true`, default: `false`. Count primary alignments only. Primary alignments are identified using bit 0x100 in SAM/BAM FLAG field.\n" + , + "default":false + } + + + , + "ignore_dup": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Ignore duplicate reads in read counting", + "help_text": "Type: `boolean_true`, default: `false`. Ignore duplicate reads in read counting. Duplicate reads are identified using bit Ox400 in BAM/SAM FLAG field. The whole read pair is ignored if one of the reads is a duplicate read for paired end data.\n" + , + "default":false + } + + +} +}, + + + "strandedness" : { + "title": "Strandedness", + "type": "object", + "description": "No description", + "properties": { + + + "strand": { + "type": + "integer", + "description": "Type: `integer`, example: `0`, choices: ``0`, `1`, `2``. Perform strand-specific read counting", + "help_text": "Type: `integer`, example: `0`, choices: ``0`, `1`, `2``. Perform strand-specific read counting. A single integer value (applied to all input files) should be provided. Possible values include: 0 (unstranded), 1 (stranded) and 2 (reversely stranded). Default value is 0 (ie. unstranded read counting carried out for all input files).\n", + "enum": [0, 1, 2] + + + } + + +} +}, + + + "exon-exon junctions" : { + "title": "Exon-exon junctions", + "type": "object", + "description": "No description", + "properties": { + + + "ref_fasta": { + "type": + "string", + "description": "Type: `file`, example: `reference.fasta`. Provide the name of a FASTA-format file that contains the reference sequences used in read mapping that produced the provided SAM/BAM files", + "help_text": "Type: `file`, example: `reference.fasta`. Provide the name of a FASTA-format file that contains the reference sequences used in read mapping that produced the provided SAM/BAM files.\n" + + } + + +} +}, + + + "parameters specific to paired end reads" : { + "title": "Parameters specific to paired end reads", + "type": "object", + "description": "No description", + "properties": { + + + "paired": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Specify that input data contain paired-end reads", + "help_text": "Type: `boolean_true`, default: `false`. Specify that input data contain paired-end reads. To perform fragment counting (ie. counting read pairs), the \u0027--countReadPairs\u0027 parameter should also be specified in addition to this parameter.\n" + , + "default":false + } + + + , + "count_read_pairs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Count read pairs (fragments) instead of reads", + "help_text": "Type: `boolean_true`, default: `false`. Count read pairs (fragments) instead of reads. This option is only applicable for paired-end reads.\n" + , + "default":false + } + + + , + "both_aligned": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Count read pairs (fragments) instead of reads", + "help_text": "Type: `boolean_true`, default: `false`. Count read pairs (fragments) instead of reads. This option is only applicable for paired-end reads.\n" + , + "default":false + } + + + , + "check_pe_dist": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Check validity of paired-end distance when counting read pairs", + "help_text": "Type: `boolean_true`, default: `false`. Check validity of paired-end distance when counting read pairs. Use \u0027--min_length\u0027 and \u0027--max_length\u0027 to set thresholds.\n" + , + "default":false + } + + + , + "min_length": { + "type": + "integer", + "description": "Type: `integer`, example: `50`. Minimum fragment/template length, 50 by default", + "help_text": "Type: `integer`, example: `50`. Minimum fragment/template length, 50 by default.\n" + + } + + + , + "max_length": { + "type": + "integer", + "description": "Type: `integer`, example: `600`. Maximum fragment/template length, 600 by default", + "help_text": "Type: `integer`, example: `600`. Maximum fragment/template length, 600 by default.\n" + + } + + + , + "same_strand": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Do not count read pairs that have their two ends mapping to different chromosomes or mapping to same chromosome but on different strands", + "help_text": "Type: `boolean_true`, default: `false`. Do not count read pairs that have their two ends mapping to different chromosomes or mapping to same chromosome but on different strands.\n" + , + "default":false + } + + + , + "donotsort": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Do not sort reads in BAM/SAM input", + "help_text": "Type: `boolean_true`, default: `false`. Do not sort reads in BAM/SAM input. Note that reads from the same pair are required to be located next to each other in the input.\n" + , + "default":false + } + + +} +}, + + + "read groups" : { + "title": "Read groups", + "type": "object", + "description": "No description", + "properties": { + + + "by_read_group": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Assign reads by read group", + "help_text": "Type: `boolean_true`, default: `false`. Assign reads by read group. \"RG\" tag is required to be present in the input BAM/SAM files.\n" + , + "default":false + } + + +} +}, + + + "long reads" : { + "title": "Long reads", + "type": "object", + "description": "No description", + "properties": { + + + "long_reads": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Count long reads such as Nanopore and PacBio reads", + "help_text": "Type: `boolean_true`, default: `false`. Count long reads such as Nanopore and PacBio reads. Long read counting can only run in one thread and only reads (not read-pairs) can be counted. There is no limitation on the number of \u0027M\u0027 operations allowed in a CIGAR string in long read counting.\n" + , + "default":false + } + + +} +}, + + + "assignment results for each read" : { + "title": "Assignment results for each read", + "type": "object", + "description": "No description", + "properties": { + + + "detailed_results": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.detailed_results`, example: `detailed_results`. Directory to save the detailed assignment results", + "help_text": "Type: `file`, default: `$id.$key.detailed_results`, example: `detailed_results`. Directory to save the detailed assignment results. Use `--detailed_results_format` to determine the format of the detailed results.\n" + , + "default":"$id.$key.detailed_results" + } + + + , + "detailed_results_format": { + "type": + "string", + "description": "Type: `string`, choices: ``CORE`, `SAM`, `BAM``. Output detailed assignment results for each read or read-pair", + "help_text": "Type: `string`, choices: ``CORE`, `SAM`, `BAM``. Output detailed assignment results for each read or read-pair. Results are saved to a file that is in one of the following formats: CORE, SAM and BAM. See documentaiton for more info about these formats.\n", + "enum": ["CORE", "SAM", "BAM"] + + + } + + +} +}, + + + "miscellaneous" : { + "title": "Miscellaneous", + "type": "object", + "description": "No description", + "properties": { + + + "max_M_op": { + "type": + "integer", + "description": "Type: `integer`, example: `10`. Maximum number of \u0027M\u0027 operations allowed in a CIGAR string", + "help_text": "Type: `integer`, example: `10`. Maximum number of \u0027M\u0027 operations allowed in a CIGAR string. 10 by default. Both \u0027X\u0027 and \u0027=\u0027 are treated as \u0027M\u0027 and adjacent \u0027M\u0027 operations are merged in the CIGAR string.\n" + + } + + + , + "verbose": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output verbose information for debugging, such as un-matched chromosome/contig names", + "help_text": "Type: `boolean_true`, default: `false`. Output verbose information for debugging, such as un-matched chromosome/contig names.\n" + , + "default":false + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/annotation" + }, + + { + "$ref": "#/definitions/level of summarization" + }, + + { + "$ref": "#/definitions/overlap between reads and features" + }, + + { + "$ref": "#/definitions/multi-mapping reads" + }, + + { + "$ref": "#/definitions/fractional counting" + }, + + { + "$ref": "#/definitions/read filtering" + }, + + { + "$ref": "#/definitions/strandedness" + }, + + { + "$ref": "#/definitions/exon-exon junctions" + }, + + { + "$ref": "#/definitions/parameters specific to paired end reads" + }, + + { + "$ref": "#/definitions/read groups" + }, + + { + "$ref": "#/definitions/long reads" + }, + + { + "$ref": "#/definitions/assignment results for each read" + }, + + { + "$ref": "#/definitions/miscellaneous" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/.config.vsh.yaml new file mode 100644 index 0000000..66fbe8b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/.config.vsh.yaml @@ -0,0 +1,231 @@ +name: "fq_subsample" +version: "v0.3.1" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input_1" + description: "First input fastq file to subsample. Accepts both raw and gzipped\ + \ FASTQ inputs." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_2" + description: "Second input fastq files to subsample. Accepts both raw and gzipped\ + \ FASTQ inputs." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_1" + description: "Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_2" + description: "Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Options" + arguments: + - type: "double" + name: "--probability" + description: "The probability a record is kept, as a percentage (0.0, 1.0). Cannot\ + \ be used with `record-count`" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--record_count" + description: "The exact number of records to keep. Cannot be used with `probability`" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seed" + description: "Seed to use for the random number generator" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "fq subsample outputs a subset of records from single or paired FASTQ\ + \ files." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "fastq" +- "subsample" +- "subset" +license: "MIT" +links: + repository: "https://github.com/stjude-rust-labs/fq" + homepage: "https://github.com/stjude-rust-labs/fq/blob/master/README.md" + documentation: "https://github.com/stjude-rust-labs/fq/blob/master/README.md" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "rust:1.81-slim" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && apt-get install -y git procps && \\\ngit clone --depth 1\ + \ --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \\\ncd fq &&\ + \ \\\ncargo install --locked --path . && \\\nmv target/release/fq /usr/local/bin/\ + \ && \\\ncd / && rm -rf /fq\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/fq_subsample/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/fq_subsample" + executable: "target/nextflow/fq_subsample/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/main.nf new file mode 100644 index 0000000..843ed0e --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/main.nf @@ -0,0 +1,3860 @@ +// fq_subsample v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "fq_subsample", + "version" : "v0.3.1", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input_1", + "description" : "First input fastq file to subsample. Accepts both raw and gzipped FASTQ inputs.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_2", + "description" : "Second input fastq files to subsample. Accepts both raw and gzipped FASTQ inputs.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output_1", + "description" : "Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_2", + "description" : "Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "double", + "name" : "--probability", + "description" : "The probability a record is kept, as a percentage (0.0, 1.0). Cannot be used with `record-count`", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--record_count", + "description" : "The exact number of records to keep. Cannot be used with `probability`", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seed", + "description" : "Seed to use for the random number generator", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "fq subsample outputs a subset of records from single or paired FASTQ files.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "fastq", + "subsample", + "subset" + ], + "license" : "MIT", + "links" : { + "repository" : "https://github.com/stjude-rust-labs/fq", + "homepage" : "https://github.com/stjude-rust-labs/fq/blob/master/README.md", + "documentation" : "https://github.com/stjude-rust-labs/fq/blob/master/README.md" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "rust:1.81-slim", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt-get update && apt-get install -y git procps && \\\\\ngit clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \\\\\ncd fq && \\\\\ncargo install --locked --path . && \\\\\nmv target/release/fq /usr/local/bin/ && \\\\\ncd / && rm -rf /fq\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/fq_subsample/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/fq_subsample", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT_1+x} ]; then echo "${VIASH_PAR_INPUT_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_1='&'#" ; else echo "# par_input_1="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_2+x} ]; then echo "${VIASH_PAR_INPUT_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_2='&'#" ; else echo "# par_input_2="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_1+x} ]; then echo "${VIASH_PAR_OUTPUT_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_1='&'#" ; else echo "# par_output_1="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_2+x} ]; then echo "${VIASH_PAR_OUTPUT_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_2='&'#" ; else echo "# par_output_2="; fi ) +$( if [ ! -z ${VIASH_PAR_PROBABILITY+x} ]; then echo "${VIASH_PAR_PROBABILITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_probability='&'#" ; else echo "# par_probability="; fi ) +$( if [ ! -z ${VIASH_PAR_RECORD_COUNT+x} ]; then echo "${VIASH_PAR_RECORD_COUNT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_record_count='&'#" ; else echo "# par_record_count="; fi ) +$( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "${VIASH_PAR_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seed='&'#" ; else echo "# par_seed="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -eo pipefail + + +required_args=("-p" "--probability" "-n" "--record_count") + +# exclusive OR for required arguments \\$par_probability and \\$par_record_count +if [[ -n \\$par_probability && -n \\$par_record_count ]] || [[ -z \\$par_probability && -z \\$par_record_count ]]; then + echo "FQ/SUBSAMPLE requires either --probability or --record_count to be specified" + exit 1 +fi + + +fq subsample \\\\ + \\${par_output_1:+--r1-dst "\\${par_output_1}"} \\\\ + \\${par_output_2:+--r2-dst "\\${par_output_2}"} \\\\ + \\${par_probability:+--probability "\\${par_probability}"} \\\\ + \\${par_record_count:+--record-count "\\${par_record_count}"} \\\\ + \\${par_seed:+--seed "\\${par_seed}"} \\\\ + \\${par_input_1} \\\\ + \\${par_input_2} +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/fq_subsample", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/nextflow.config new file mode 100644 index 0000000..05758bd --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'fq_subsample' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'fq subsample outputs a subset of records from single or paired FASTQ files.' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/nextflow_schema.json new file mode 100644 index 0000000..83a6f61 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/nextflow_schema.json @@ -0,0 +1,160 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "fq_subsample", +"description": "fq subsample outputs a subset of records from single or paired FASTQ files.", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input_1": { + "type": + "string", + "description": "Type: `file`, required. First input fastq file to subsample", + "help_text": "Type: `file`, required. First input fastq file to subsample. Accepts both raw and gzipped FASTQ inputs." + + } + + + , + "input_2": { + "type": + "string", + "description": "Type: `file`. Second input fastq files to subsample", + "help_text": "Type: `file`. Second input fastq files to subsample. Accepts both raw and gzipped FASTQ inputs." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_1": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_1`. Sampled read 1 fastq files", + "help_text": "Type: `file`, default: `$id.$key.output_1`. Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`." + , + "default":"$id.$key.output_1" + } + + + , + "output_2": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_2`. Sampled read 2 fastq files", + "help_text": "Type: `file`, default: `$id.$key.output_2`. Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`." + , + "default":"$id.$key.output_2" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "probability": { + "type": + "number", + "description": "Type: `double`. The probability a record is kept, as a percentage (0", + "help_text": "Type: `double`. The probability a record is kept, as a percentage (0.0, 1.0). Cannot be used with `record-count`" + + } + + + , + "record_count": { + "type": + "integer", + "description": "Type: `integer`. The exact number of records to keep", + "help_text": "Type: `integer`. The exact number of records to keep. Cannot be used with `probability`" + + } + + + , + "seed": { + "type": + "integer", + "description": "Type: `integer`. Seed to use for the random number generator", + "help_text": "Type: `integer`. Seed to use for the random number generator" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/.config.vsh.yaml new file mode 100644 index 0000000..7fb3826 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/.config.vsh.yaml @@ -0,0 +1,726 @@ +name: "gffread" +version: "v0.3.1" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "A reference file in either the GFF3, GFF2 or GTF format.\n" + info: null + example: + - "annotation.gff" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chr_mapping" + alternatives: + - "-m" + description: " is a name mapping table for converting reference sequence\ + \ names, \nhaving this 2-column format: .\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--seq_info" + alternatives: + - "-s" + description: " is a tab-delimited file providing this info for\ + \ each of the mapped \nsequences: \ + \ (useful for --description option with \nmRNA/EST/protein mappings).\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome" + alternatives: + - "-g" + description: "Full path to a multi-fasta file with the genomic sequences for all\ + \ input mappings, \nOR a directory with single-fasta files (one per genomic\ + \ sequence, with file names \nmatching sequence names).\n" + info: null + example: + - "genome.fa" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--outfile" + alternatives: + - "-o" + description: "Write the output records into .\n" + info: null + example: + - "output.gff" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--force_exons" + description: "Make sure that the lowest level GFF features are considered \"exon\"\ + \ features.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--gene2exon" + description: "For single-line genes not parenting any transcripts, add an exon\ + \ feature spanning \nthe entire gene (treat it as a transcript).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--t_adopt" + description: "Try to find a parent gene overlapping/containing a transcript that\ + \ does not have \nany explicit gene Parent.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--decode" + alternatives: + - "-D" + description: "Decode url encoded characters within attributes.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--merge_exons" + alternatives: + - "-Z" + description: "Merge very close exons into a single exon (when intron size<4).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--junctions" + alternatives: + - "-j" + description: "Output the junctions and the corresponding transcripts.\n" + info: null + direction: "input" + - type: "file" + name: "--spliced_exons" + alternatives: + - "-w" + description: "Write a fasta file with spliced exons for each transcript.\n" + info: null + example: + - "exons.fa" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--w_add" + description: "For the --spliced_exons option, extract additional bases both\ + \ upstream and \ndownstream of the transcript boundaries.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--w_nocds" + description: "For --spliced_exons, disable the output of CDS info in the FASTA\ + \ file.\n" + info: null + direction: "input" + - type: "file" + name: "--spliced_cds" + alternatives: + - "-x" + description: "Write a fasta file with spliced CDS for each GFF transcript.\n" + info: null + example: + - "cds.fa" + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tr_cds" + alternatives: + - "-y" + description: "Write a protein fasta file with the translation of CDS for each\ + \ record.\n" + info: null + example: + - "tr_cds.fa" + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--w_coords" + alternatives: + - "-W" + description: "For --spliced_exons, --spliced_cds and -tr_cds options, write in\ + \ the FASTA defline \nall the exon coordinates projected onto the spliced sequence.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--stop_dot" + alternatives: + - "-S" + description: "For --tr_cds option, use '*' instead of '.' as stop codon translation.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--id_version" + alternatives: + - "-L" + description: "Ensembl GTF to GFF3 conversion, adds version to IDs.\n" + info: null + direction: "input" + - type: "string" + name: "--trackname" + alternatives: + - "-t" + description: "Use in the 2nd column of each GFF/GTF output line.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--gtf_output" + alternatives: + - "-T" + description: "Main output will be GTF instead of GFF3.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--bed" + description: "Output records in BED format instead of default GFF3.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--tlf" + description: "Output \"transcript line format\" which is like GFF but with exons\ + \ and CDS related \nfeatures stored as GFF attributes in the transcript feature\ + \ line, like this:\n exoncount=N;exons=;CDSphase=;CDS=\n\ + is a comma-delimited list of exon_start-exon_end coordinates;\n\ + \ is CDS_start:CDS_end coordinates or a list like .\n" + info: null + direction: "input" + - type: "string" + name: "--table" + description: "Output a simple tab delimited format instead of GFF, with columns\ + \ having the values \nof GFF attributes given in ; special pseudo-attributes\ + \ (prefixed by @) are \nrecognized:\n @id, @geneid, @chr, @start, @end, @strand,\ + \ @numexons, @exons, @cds, @covlen, @cdslen\nIf any of --spliced_exons/--tr_cds/--spliced_cds\ + \ FASTA output files are enabled, the \nsame fields (excluding @id) are appended\ + \ to the definition line of corresponding FASTA\nrecords.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "boolean_true" + name: "--expose_dups" + alternatives: + - "-E" + - "-v" + description: "Expose (warn about) duplicate transcript IDs and other potential\ + \ problems with the \ngiven GFF/GTF records.\n" + info: null + direction: "input" +- name: "Options" + arguments: + - type: "file" + name: "--ids" + description: "Discard records/transcripts if their IDs are not listed in .\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--nids" + description: "Discard records/transcripts if their IDs are listed in .\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--maxintron" + alternatives: + - "-i" + description: "Discard transcripts having an intron larger than .\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--minlen" + alternatives: + - "-l" + description: "Discard transcripts shorter than bases.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--range" + alternatives: + - "-r" + description: "Only show transcripts overlapping coordinate range ..\ + \ (on chromosome/contig \n, strand if provided).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--strict_range" + alternatives: + - "-R" + description: "For --range option, discard all transcripts that are not fully contained\ + \ within the given \nrange.\n" + info: null + direction: "input" + - type: "string" + name: "--jmatch" + description: "Only output transcripts matching the given junction.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--no_single_exon" + alternatives: + - "-U" + description: "Discard single-exon transcripts.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--coding" + alternatives: + - "-C" + description: "Coding only: discard mRNAs that have no CDS features.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--nc" + description: "Non-coding only: discard mRNAs that have CDS features.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--ignore_locus" + description: "Discard locus features and attributes found in the input.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--description" + alternatives: + - "-A" + description: "Use the description field from and add it as the\ + \ value for a 'descr' \nattribute to the GFF record.\n" + info: null + direction: "input" +- name: "Sorting" + arguments: + - type: "boolean_true" + name: "--sort_alpha" + description: "Chromosomes (reference sequences) are sorted alphabetically.\n" + info: null + direction: "input" + - type: "file" + name: "--sort_by" + description: "Sort the reference sequences by the order in which their names are\ + \ given in the \n file.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Misc options" + arguments: + - type: "boolean_true" + name: "--keep_attrs" + alternatives: + - "-F" + description: "Keep all GFF attributes (for non-exon features).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--keep_exon_attrs" + description: "For -F option, do not attempt to reduce redundant exon/CDS attributes.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_exon_attrs" + alternatives: + - "-G" + description: "Do not keep exon attributes, move them to the transcript feature\ + \ (for GFF3 output).\n" + info: null + direction: "input" + - type: "string" + name: "--attrs" + description: "Only output the GTF/GFF attributes listed in which is\ + \ a comma delimited \nlist of attribute names to.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--keep_genes" + description: "In transcript-only mode (default), also preserve gene records.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--keep_comments" + description: "For GFF3 input/output, try to preserve comments.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--process_other" + alternatives: + - "-O" + description: "process other non-transcript GFF records (by default non-transcript\ + \ records are ignored).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--rm_stop_codons" + alternatives: + - "-V" + description: "Discard any mRNAs with CDS having in-frame stop codons (requires\ + \ --genome).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--adj_cds_start" + alternatives: + - "-H" + description: "For --rm_stop_codons option, check and adjust the starting CDS phase\ + \ if the original phase\nleads to a translation with an in-frame stop codon.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--opposite_strand" + alternatives: + - "-B" + description: "For -V option, single-exon transcripts are also checked on the opposite\ + \ strand (requires \n--genome). \n" + info: null + direction: "input" + - type: "boolean_true" + name: "--coding_status" + alternatives: + - "-P" + description: "Add transcript level GFF attributes about the coding status of each\ + \ transcript, including \npartialness or in-frame stop codons (requires --genome).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--add_hasCDS" + description: "Add a \"hasCDS\" attribute with value \"true\" for transcripts that\ + \ have CDS features. \n" + info: null + direction: "input" + - type: "boolean_true" + name: "--adj_stop" + description: "Stop codon adjustment: enables --coding_status and performs automatic\ + \ adjustment of the CDS stop \ncoordinate if premature or downstream.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--rm_noncanon" + alternatives: + - "-N" + description: "Discard multi-exon mRNAs that have any intron with a non-canonical\ + \ splice site consensus \n(i.e. not GT-AG, GC-AG or AT-AC).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--complete_cds" + alternatives: + - "-J" + description: "Discard any mRNAs that either lack initial START codon or the terminal\ + \ STOP codon, or \nhave an in-frame stop codon (i.e. only print mRNAs with a\ + \ complete CDS).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_pseudo" + description: "Filter out records matching the 'pseudo' keyword.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--in_bed" + description: "Input should be parsed as BED format (automatic if the input filename\ + \ ends with .bed*).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--in_tlf" + description: "Input GFF-like one-line-per-transcript format without exon/CDS features\ + \ (see --tlf option \nbelow); automatic if the input filename ends with .tlf).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--stream" + description: "Fast processing of input GFF/BED transcripts as they are received\ + \ (no sorting, exons must \nbe grouped by transcript in the input data).\n" + info: null + direction: "input" +- name: "Clustering" + arguments: + - type: "boolean_true" + name: "--merge" + alternatives: + - "-M" + description: "Cluster the input transcripts into loci, discarding \"redundant\"\ + \ transcripts (those with \nthe same exact introns and fully contained or equal\ + \ boundaries).\n" + info: null + direction: "input" + - type: "file" + name: "--dupinfo" + alternatives: + - "-d" + description: "For --merge option, write duplication info to file .\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--cluster_only" + description: "Same as --merge but without discarding any of the \"duplicate\"\ + \ transcripts, only create \n\"locus\" features.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--rm_redundant" + alternatives: + - "-K" + description: "For --merge option: also discard as redundant the shorter, fully\ + \ contained transcripts (intron \nchains matching a part of the container).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_boundary" + alternatives: + - "-Q" + description: "For --merge option, no longer require boundary containment when\ + \ assessing redundancy (can be \ncombined with --rm_redundant); only introns\ + \ have to match for multi-exon transcripts, and >=80%\noverlap for single-exon\ + \ transcripts.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_overlap" + alternatives: + - "-Y" + description: "For --merge option, enforce --no_boundary but also discard overlapping\ + \ single-exon transcripts,\neven on the opposite strand (can be combined with\ + \ --rm_redudant).\n" + info: null + direction: "input" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Validate, filter, convert and perform various other operations on GFF\ + \ files." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "gff" +- "conversion" +- "validation" +- "filtering" +license: "MIT" +references: + doi: + - "10.12688/f1000research.23297.2" +links: + repository: "https://github.com/gpertea/gffread" + homepage: "https://ccb.jhu.edu/software/stringtie/gff.shtml#gffread" + documentation: "https://ccb.jhu.edu/software/stringtie/gff.shtml#gffread" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/gffread:0.12.7--hdcf5f25_3" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "echo \"gffread: \\\"$(gffread --version 2>&1)\\\"\" > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/gffread/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/gffread" + executable: "target/nextflow/gffread/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/main.nf new file mode 100644 index 0000000..73d6844 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/main.nf @@ -0,0 +1,4593 @@ +// gffread v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "gffread", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "A reference file in either the GFF3, GFF2 or GTF format.\n", + "example" : [ + "annotation.gff" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chr_mapping", + "alternatives" : [ + "-m" + ], + "description" : " is a name mapping table for converting reference sequence names, \nhaving this 2-column format: .\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--seq_info", + "alternatives" : [ + "-s" + ], + "description" : " is a tab-delimited file providing this info for each of the mapped \nsequences: (useful for --description option with \nmRNA/EST/protein mappings).\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome", + "alternatives" : [ + "-g" + ], + "description" : "Full path to a multi-fasta file with the genomic sequences for all input mappings, \nOR a directory with single-fasta files (one per genomic sequence, with file names \nmatching sequence names).\n", + "example" : [ + "genome.fa" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--outfile", + "alternatives" : [ + "-o" + ], + "description" : "Write the output records into .\n", + "example" : [ + "output.gff" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--force_exons", + "description" : "Make sure that the lowest level GFF features are considered \\"exon\\" features.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--gene2exon", + "description" : "For single-line genes not parenting any transcripts, add an exon feature spanning \nthe entire gene (treat it as a transcript).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--t_adopt", + "description" : "Try to find a parent gene overlapping/containing a transcript that does not have \nany explicit gene Parent.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--decode", + "alternatives" : [ + "-D" + ], + "description" : "Decode url encoded characters within attributes.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--merge_exons", + "alternatives" : [ + "-Z" + ], + "description" : "Merge very close exons into a single exon (when intron size<4).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--junctions", + "alternatives" : [ + "-j" + ], + "description" : "Output the junctions and the corresponding transcripts.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--spliced_exons", + "alternatives" : [ + "-w" + ], + "description" : "Write a fasta file with spliced exons for each transcript.\n", + "example" : [ + "exons.fa" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--w_add", + "description" : "For the --spliced_exons option, extract additional bases both upstream and \ndownstream of the transcript boundaries.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--w_nocds", + "description" : "For --spliced_exons, disable the output of CDS info in the FASTA file.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--spliced_cds", + "alternatives" : [ + "-x" + ], + "description" : "Write a fasta file with spliced CDS for each GFF transcript.\n", + "example" : [ + "cds.fa" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tr_cds", + "alternatives" : [ + "-y" + ], + "description" : "Write a protein fasta file with the translation of CDS for each record.\n", + "example" : [ + "tr_cds.fa" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--w_coords", + "alternatives" : [ + "-W" + ], + "description" : "For --spliced_exons, --spliced_cds and -tr_cds options, write in the FASTA defline \nall the exon coordinates projected onto the spliced sequence.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--stop_dot", + "alternatives" : [ + "-S" + ], + "description" : "For --tr_cds option, use '*' instead of '.' as stop codon translation.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--id_version", + "alternatives" : [ + "-L" + ], + "description" : "Ensembl GTF to GFF3 conversion, adds version to IDs.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--trackname", + "alternatives" : [ + "-t" + ], + "description" : "Use in the 2nd column of each GFF/GTF output line.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--gtf_output", + "alternatives" : [ + "-T" + ], + "description" : "Main output will be GTF instead of GFF3.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--bed", + "description" : "Output records in BED format instead of default GFF3.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--tlf", + "description" : "Output \\"transcript line format\\" which is like GFF but with exons and CDS related \nfeatures stored as GFF attributes in the transcript feature line, like this:\n exoncount=N;exons=;CDSphase=;CDS=\n is a comma-delimited list of exon_start-exon_end coordinates;\n is CDS_start:CDS_end coordinates or a list like .\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--table", + "description" : "Output a simple tab delimited format instead of GFF, with columns having the values \nof GFF attributes given in ; special pseudo-attributes (prefixed by @) are \nrecognized:\n @id, @geneid, @chr, @start, @end, @strand, @numexons, @exons, @cds, @covlen, @cdslen\nIf any of --spliced_exons/--tr_cds/--spliced_cds FASTA output files are enabled, the \nsame fields (excluding @id) are appended to the definition line of corresponding FASTA\nrecords.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--expose_dups", + "alternatives" : [ + "-E", + "-v" + ], + "description" : "Expose (warn about) duplicate transcript IDs and other potential problems with the \ngiven GFF/GTF records.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "file", + "name" : "--ids", + "description" : "Discard records/transcripts if their IDs are not listed in .\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--nids", + "description" : "Discard records/transcripts if their IDs are listed in .\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--maxintron", + "alternatives" : [ + "-i" + ], + "description" : "Discard transcripts having an intron larger than .\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--minlen", + "alternatives" : [ + "-l" + ], + "description" : "Discard transcripts shorter than bases.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--range", + "alternatives" : [ + "-r" + ], + "description" : "Only show transcripts overlapping coordinate range .. (on chromosome/contig \n, strand if provided).\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--strict_range", + "alternatives" : [ + "-R" + ], + "description" : "For --range option, discard all transcripts that are not fully contained within the given \nrange.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--jmatch", + "description" : "Only output transcripts matching the given junction.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--no_single_exon", + "alternatives" : [ + "-U" + ], + "description" : "Discard single-exon transcripts.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--coding", + "alternatives" : [ + "-C" + ], + "description" : "Coding only: discard mRNAs that have no CDS features.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--nc", + "description" : "Non-coding only: discard mRNAs that have CDS features.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--ignore_locus", + "description" : "Discard locus features and attributes found in the input.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--description", + "alternatives" : [ + "-A" + ], + "description" : "Use the description field from and add it as the value for a 'descr' \nattribute to the GFF record.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Sorting", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--sort_alpha", + "description" : "Chromosomes (reference sequences) are sorted alphabetically.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--sort_by", + "description" : "Sort the reference sequences by the order in which their names are given in the \n file.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Misc options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--keep_attrs", + "alternatives" : [ + "-F" + ], + "description" : "Keep all GFF attributes (for non-exon features).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--keep_exon_attrs", + "description" : "For -F option, do not attempt to reduce redundant exon/CDS attributes.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_exon_attrs", + "alternatives" : [ + "-G" + ], + "description" : "Do not keep exon attributes, move them to the transcript feature (for GFF3 output).\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--attrs", + "description" : "Only output the GTF/GFF attributes listed in which is a comma delimited \nlist of attribute names to.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--keep_genes", + "description" : "In transcript-only mode (default), also preserve gene records.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--keep_comments", + "description" : "For GFF3 input/output, try to preserve comments.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--process_other", + "alternatives" : [ + "-O" + ], + "description" : "process other non-transcript GFF records (by default non-transcript records are ignored).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--rm_stop_codons", + "alternatives" : [ + "-V" + ], + "description" : "Discard any mRNAs with CDS having in-frame stop codons (requires --genome).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--adj_cds_start", + "alternatives" : [ + "-H" + ], + "description" : "For --rm_stop_codons option, check and adjust the starting CDS phase if the original phase\nleads to a translation with an in-frame stop codon.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--opposite_strand", + "alternatives" : [ + "-B" + ], + "description" : "For -V option, single-exon transcripts are also checked on the opposite strand (requires \n--genome). \n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--coding_status", + "alternatives" : [ + "-P" + ], + "description" : "Add transcript level GFF attributes about the coding status of each transcript, including \npartialness or in-frame stop codons (requires --genome).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--add_hasCDS", + "description" : "Add a \\"hasCDS\\" attribute with value \\"true\\" for transcripts that have CDS features. \n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--adj_stop", + "description" : "Stop codon adjustment: enables --coding_status and performs automatic adjustment of the CDS stop \ncoordinate if premature or downstream.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--rm_noncanon", + "alternatives" : [ + "-N" + ], + "description" : "Discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus \n(i.e. not GT-AG, GC-AG or AT-AC).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--complete_cds", + "alternatives" : [ + "-J" + ], + "description" : "Discard any mRNAs that either lack initial START codon or the terminal STOP codon, or \nhave an in-frame stop codon (i.e. only print mRNAs with a complete CDS).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_pseudo", + "description" : "Filter out records matching the 'pseudo' keyword.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--in_bed", + "description" : "Input should be parsed as BED format (automatic if the input filename ends with .bed*).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--in_tlf", + "description" : "Input GFF-like one-line-per-transcript format without exon/CDS features (see --tlf option \nbelow); automatic if the input filename ends with .tlf).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--stream", + "description" : "Fast processing of input GFF/BED transcripts as they are received (no sorting, exons must \nbe grouped by transcript in the input data).\n", + "direction" : "input" + } + ] + }, + { + "name" : "Clustering", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--merge", + "alternatives" : [ + "-M" + ], + "description" : "Cluster the input transcripts into loci, discarding \\"redundant\\" transcripts (those with \nthe same exact introns and fully contained or equal boundaries).\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--dupinfo", + "alternatives" : [ + "-d" + ], + "description" : "For --merge option, write duplication info to file .\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--cluster_only", + "description" : "Same as --merge but without discarding any of the \\"duplicate\\" transcripts, only create \n\\"locus\\" features.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--rm_redundant", + "alternatives" : [ + "-K" + ], + "description" : "For --merge option: also discard as redundant the shorter, fully contained transcripts (intron \nchains matching a part of the container).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_boundary", + "alternatives" : [ + "-Q" + ], + "description" : "For --merge option, no longer require boundary containment when assessing redundancy (can be \ncombined with --rm_redundant); only introns have to match for multi-exon transcripts, and >=80%\noverlap for single-exon transcripts.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_overlap", + "alternatives" : [ + "-Y" + ], + "description" : "For --merge option, enforce --no_boundary but also discard overlapping single-exon transcripts,\neven on the opposite strand (can be combined with --rm_redudant).\n", + "direction" : "input" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Validate, filter, convert and perform various other operations on GFF files.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "gff", + "conversion", + "validation", + "filtering" + ], + "license" : "MIT", + "references" : { + "doi" : [ + "10.12688/f1000research.23297.2" + ] + }, + "links" : { + "repository" : "https://github.com/gpertea/gffread", + "homepage" : "https://ccb.jhu.edu/software/stringtie/gff.shtml#gffread", + "documentation" : "https://ccb.jhu.edu/software/stringtie/gff.shtml#gffread" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/gffread:0.12.7--hdcf5f25_3", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "echo \\"gffread: \\\\\\"$(gffread --version 2>&1)\\\\\\"\\" > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/gffread/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/gffread", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_CHR_MAPPING+x} ]; then echo "${VIASH_PAR_CHR_MAPPING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chr_mapping='&'#" ; else echo "# par_chr_mapping="; fi ) +$( if [ ! -z ${VIASH_PAR_SEQ_INFO+x} ]; then echo "${VIASH_PAR_SEQ_INFO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seq_info='&'#" ; else echo "# par_seq_info="; fi ) +$( if [ ! -z ${VIASH_PAR_GENOME+x} ]; then echo "${VIASH_PAR_GENOME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome='&'#" ; else echo "# par_genome="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTFILE+x} ]; then echo "${VIASH_PAR_OUTFILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_outfile='&'#" ; else echo "# par_outfile="; fi ) +$( if [ ! -z ${VIASH_PAR_FORCE_EXONS+x} ]; then echo "${VIASH_PAR_FORCE_EXONS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_force_exons='&'#" ; else echo "# par_force_exons="; fi ) +$( if [ ! -z ${VIASH_PAR_GENE2EXON+x} ]; then echo "${VIASH_PAR_GENE2EXON}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gene2exon='&'#" ; else echo "# par_gene2exon="; fi ) +$( if [ ! -z ${VIASH_PAR_T_ADOPT+x} ]; then echo "${VIASH_PAR_T_ADOPT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_t_adopt='&'#" ; else echo "# par_t_adopt="; fi ) +$( if [ ! -z ${VIASH_PAR_DECODE+x} ]; then echo "${VIASH_PAR_DECODE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_decode='&'#" ; else echo "# par_decode="; fi ) +$( if [ ! -z ${VIASH_PAR_MERGE_EXONS+x} ]; then echo "${VIASH_PAR_MERGE_EXONS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_merge_exons='&'#" ; else echo "# par_merge_exons="; fi ) +$( if [ ! -z ${VIASH_PAR_JUNCTIONS+x} ]; then echo "${VIASH_PAR_JUNCTIONS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_junctions='&'#" ; else echo "# par_junctions="; fi ) +$( if [ ! -z ${VIASH_PAR_SPLICED_EXONS+x} ]; then echo "${VIASH_PAR_SPLICED_EXONS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_spliced_exons='&'#" ; else echo "# par_spliced_exons="; fi ) +$( if [ ! -z ${VIASH_PAR_W_ADD+x} ]; then echo "${VIASH_PAR_W_ADD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_w_add='&'#" ; else echo "# par_w_add="; fi ) +$( if [ ! -z ${VIASH_PAR_W_NOCDS+x} ]; then echo "${VIASH_PAR_W_NOCDS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_w_nocds='&'#" ; else echo "# par_w_nocds="; fi ) +$( if [ ! -z ${VIASH_PAR_SPLICED_CDS+x} ]; then echo "${VIASH_PAR_SPLICED_CDS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_spliced_cds='&'#" ; else echo "# par_spliced_cds="; fi ) +$( if [ ! -z ${VIASH_PAR_TR_CDS+x} ]; then echo "${VIASH_PAR_TR_CDS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tr_cds='&'#" ; else echo "# par_tr_cds="; fi ) +$( if [ ! -z ${VIASH_PAR_W_COORDS+x} ]; then echo "${VIASH_PAR_W_COORDS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_w_coords='&'#" ; else echo "# par_w_coords="; fi ) +$( if [ ! -z ${VIASH_PAR_STOP_DOT+x} ]; then echo "${VIASH_PAR_STOP_DOT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_stop_dot='&'#" ; else echo "# par_stop_dot="; fi ) +$( if [ ! -z ${VIASH_PAR_ID_VERSION+x} ]; then echo "${VIASH_PAR_ID_VERSION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id_version='&'#" ; else echo "# par_id_version="; fi ) +$( if [ ! -z ${VIASH_PAR_TRACKNAME+x} ]; then echo "${VIASH_PAR_TRACKNAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trackname='&'#" ; else echo "# par_trackname="; fi ) +$( if [ ! -z ${VIASH_PAR_GTF_OUTPUT+x} ]; then echo "${VIASH_PAR_GTF_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf_output='&'#" ; else echo "# par_gtf_output="; fi ) +$( if [ ! -z ${VIASH_PAR_BED+x} ]; then echo "${VIASH_PAR_BED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bed='&'#" ; else echo "# par_bed="; fi ) +$( if [ ! -z ${VIASH_PAR_TLF+x} ]; then echo "${VIASH_PAR_TLF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tlf='&'#" ; else echo "# par_tlf="; fi ) +$( if [ ! -z ${VIASH_PAR_TABLE+x} ]; then echo "${VIASH_PAR_TABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_table='&'#" ; else echo "# par_table="; fi ) +$( if [ ! -z ${VIASH_PAR_EXPOSE_DUPS+x} ]; then echo "${VIASH_PAR_EXPOSE_DUPS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_expose_dups='&'#" ; else echo "# par_expose_dups="; fi ) +$( if [ ! -z ${VIASH_PAR_IDS+x} ]; then echo "${VIASH_PAR_IDS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ids='&'#" ; else echo "# par_ids="; fi ) +$( if [ ! -z ${VIASH_PAR_NIDS+x} ]; then echo "${VIASH_PAR_NIDS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nids='&'#" ; else echo "# par_nids="; fi ) +$( if [ ! -z ${VIASH_PAR_MAXINTRON+x} ]; then echo "${VIASH_PAR_MAXINTRON}" | sed "s#'#'\\"'\\"'#g;s#.*#par_maxintron='&'#" ; else echo "# par_maxintron="; fi ) +$( if [ ! -z ${VIASH_PAR_MINLEN+x} ]; then echo "${VIASH_PAR_MINLEN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_minlen='&'#" ; else echo "# par_minlen="; fi ) +$( if [ ! -z ${VIASH_PAR_RANGE+x} ]; then echo "${VIASH_PAR_RANGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_range='&'#" ; else echo "# par_range="; fi ) +$( if [ ! -z ${VIASH_PAR_STRICT_RANGE+x} ]; then echo "${VIASH_PAR_STRICT_RANGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strict_range='&'#" ; else echo "# par_strict_range="; fi ) +$( if [ ! -z ${VIASH_PAR_JMATCH+x} ]; then echo "${VIASH_PAR_JMATCH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_jmatch='&'#" ; else echo "# par_jmatch="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_SINGLE_EXON+x} ]; then echo "${VIASH_PAR_NO_SINGLE_EXON}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_single_exon='&'#" ; else echo "# par_no_single_exon="; fi ) +$( if [ ! -z ${VIASH_PAR_CODING+x} ]; then echo "${VIASH_PAR_CODING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_coding='&'#" ; else echo "# par_coding="; fi ) +$( if [ ! -z ${VIASH_PAR_NC+x} ]; then echo "${VIASH_PAR_NC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nc='&'#" ; else echo "# par_nc="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_LOCUS+x} ]; then echo "${VIASH_PAR_IGNORE_LOCUS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_locus='&'#" ; else echo "# par_ignore_locus="; fi ) +$( if [ ! -z ${VIASH_PAR_DESCRIPTION+x} ]; then echo "${VIASH_PAR_DESCRIPTION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_description='&'#" ; else echo "# par_description="; fi ) +$( if [ ! -z ${VIASH_PAR_SORT_ALPHA+x} ]; then echo "${VIASH_PAR_SORT_ALPHA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sort_alpha='&'#" ; else echo "# par_sort_alpha="; fi ) +$( if [ ! -z ${VIASH_PAR_SORT_BY+x} ]; then echo "${VIASH_PAR_SORT_BY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sort_by='&'#" ; else echo "# par_sort_by="; fi ) +$( if [ ! -z ${VIASH_PAR_KEEP_ATTRS+x} ]; then echo "${VIASH_PAR_KEEP_ATTRS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_keep_attrs='&'#" ; else echo "# par_keep_attrs="; fi ) +$( if [ ! -z ${VIASH_PAR_KEEP_EXON_ATTRS+x} ]; then echo "${VIASH_PAR_KEEP_EXON_ATTRS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_keep_exon_attrs='&'#" ; else echo "# par_keep_exon_attrs="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_EXON_ATTRS+x} ]; then echo "${VIASH_PAR_NO_EXON_ATTRS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_exon_attrs='&'#" ; else echo "# par_no_exon_attrs="; fi ) +$( if [ ! -z ${VIASH_PAR_ATTRS+x} ]; then echo "${VIASH_PAR_ATTRS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_attrs='&'#" ; else echo "# par_attrs="; fi ) +$( if [ ! -z ${VIASH_PAR_KEEP_GENES+x} ]; then echo "${VIASH_PAR_KEEP_GENES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_keep_genes='&'#" ; else echo "# par_keep_genes="; fi ) +$( if [ ! -z ${VIASH_PAR_KEEP_COMMENTS+x} ]; then echo "${VIASH_PAR_KEEP_COMMENTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_keep_comments='&'#" ; else echo "# par_keep_comments="; fi ) +$( if [ ! -z ${VIASH_PAR_PROCESS_OTHER+x} ]; then echo "${VIASH_PAR_PROCESS_OTHER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_process_other='&'#" ; else echo "# par_process_other="; fi ) +$( if [ ! -z ${VIASH_PAR_RM_STOP_CODONS+x} ]; then echo "${VIASH_PAR_RM_STOP_CODONS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_rm_stop_codons='&'#" ; else echo "# par_rm_stop_codons="; fi ) +$( if [ ! -z ${VIASH_PAR_ADJ_CDS_START+x} ]; then echo "${VIASH_PAR_ADJ_CDS_START}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adj_cds_start='&'#" ; else echo "# par_adj_cds_start="; fi ) +$( if [ ! -z ${VIASH_PAR_OPPOSITE_STRAND+x} ]; then echo "${VIASH_PAR_OPPOSITE_STRAND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_opposite_strand='&'#" ; else echo "# par_opposite_strand="; fi ) +$( if [ ! -z ${VIASH_PAR_CODING_STATUS+x} ]; then echo "${VIASH_PAR_CODING_STATUS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_coding_status='&'#" ; else echo "# par_coding_status="; fi ) +$( if [ ! -z ${VIASH_PAR_ADD_HASCDS+x} ]; then echo "${VIASH_PAR_ADD_HASCDS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_add_hasCDS='&'#" ; else echo "# par_add_hasCDS="; fi ) +$( if [ ! -z ${VIASH_PAR_ADJ_STOP+x} ]; then echo "${VIASH_PAR_ADJ_STOP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adj_stop='&'#" ; else echo "# par_adj_stop="; fi ) +$( if [ ! -z ${VIASH_PAR_RM_NONCANON+x} ]; then echo "${VIASH_PAR_RM_NONCANON}" | sed "s#'#'\\"'\\"'#g;s#.*#par_rm_noncanon='&'#" ; else echo "# par_rm_noncanon="; fi ) +$( if [ ! -z ${VIASH_PAR_COMPLETE_CDS+x} ]; then echo "${VIASH_PAR_COMPLETE_CDS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_complete_cds='&'#" ; else echo "# par_complete_cds="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_PSEUDO+x} ]; then echo "${VIASH_PAR_NO_PSEUDO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_pseudo='&'#" ; else echo "# par_no_pseudo="; fi ) +$( if [ ! -z ${VIASH_PAR_IN_BED+x} ]; then echo "${VIASH_PAR_IN_BED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_in_bed='&'#" ; else echo "# par_in_bed="; fi ) +$( if [ ! -z ${VIASH_PAR_IN_TLF+x} ]; then echo "${VIASH_PAR_IN_TLF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_in_tlf='&'#" ; else echo "# par_in_tlf="; fi ) +$( if [ ! -z ${VIASH_PAR_STREAM+x} ]; then echo "${VIASH_PAR_STREAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_stream='&'#" ; else echo "# par_stream="; fi ) +$( if [ ! -z ${VIASH_PAR_MERGE+x} ]; then echo "${VIASH_PAR_MERGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_merge='&'#" ; else echo "# par_merge="; fi ) +$( if [ ! -z ${VIASH_PAR_DUPINFO+x} ]; then echo "${VIASH_PAR_DUPINFO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dupinfo='&'#" ; else echo "# par_dupinfo="; fi ) +$( if [ ! -z ${VIASH_PAR_CLUSTER_ONLY+x} ]; then echo "${VIASH_PAR_CLUSTER_ONLY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cluster_only='&'#" ; else echo "# par_cluster_only="; fi ) +$( if [ ! -z ${VIASH_PAR_RM_REDUNDANT+x} ]; then echo "${VIASH_PAR_RM_REDUNDANT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_rm_redundant='&'#" ; else echo "# par_rm_redundant="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_BOUNDARY+x} ]; then echo "${VIASH_PAR_NO_BOUNDARY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_boundary='&'#" ; else echo "# par_no_boundary="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_OVERLAP+x} ]; then echo "${VIASH_PAR_NO_OVERLAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_overlap='&'#" ; else echo "# par_no_overlap="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +# unset flags +unset_if_false=( + par_coding + par_strict_range + par_no_single_exon + par_no_exon_attrs + par_nc + par_ignore_locus + par_description + par_sort_alpha + par_keep_genes + par_keep_attrs + par_keep_exon_attrs + par_keep_comments + par_process_other + par_rm_stop_codons + par_adj_cds_start + par_opposite_strand + par_coding_status + par_add_hasCDS + par_adj_stop + par_rm_noncanon + par_complete_cds + par_no_pseudo + par_in_bed + par_in_tlf + par_stream + par_merge + par_rm_redundant + par_no_boundary + par_no_overlap + par_force_exons + par_gene2exon + par_t_adopt + par_decode + par_merge_exons + par_junctions + par_w_nocds + par_tr_cds + par_w_coords + par_stop_dot + par_id_version + par_gtf_output + par_bed + par_tlf + par_expose_dups + par_cluster_only +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +# if par_table is not empty, replace ";" with "," +par_table=\\$(echo "\\$par_table" | tr ';' ',') + +\\$(which gffread) \\\\ + "\\$par_input" \\\\ + \\${par_chr_mapping:+-m "\\$par_chr_mapping"} \\\\ + \\${par_seq_info:+-s "\\$par_seq_info"} \\\\ + -o "\\$par_outfile" \\\\ + \\${par_force_exons:+--force-exons} \\\\ + \\${par_gene2exon:+--gene2exon} \\\\ + \\${par_t_adopt:+--t-adopt} \\\\ + \\${par_decode:+-D} \\\\ + \\${par_merge_exons:+-Z} \\\\ + \\${par_genome:+-g "\\$par_genome"} \\\\ + \\${par_junctions:+-j} \\\\ + \\${par_spliced_exons:+-w "\\$par_spliced_exons"} \\\\ + \\${par_w_add:+--w-add "\\$par_w_add"} \\\\ + \\${par_w_nocds:+--w-nocds} \\\\ + \\${par_spliced_cds:+-x "\\$par_spliced_cds"} \\\\ + \\${par_tr_cds:+-y "\\$par_tr_cds"} \\\\ + \\${par_w_coords:+-W} \\\\ + \\${par_stop_dot:+-S} \\\\ + \\${par_id_version:+-L} \\\\ + \\${par_trackname:+-t "\\$par_trackname"} \\\\ + \\${par_gtf_output:+-T} \\\\ + \\${par_bed:+--bed} \\\\ + \\${par_tlf:+--tlf} \\\\ + \\${par_table:+--table "\\$par_table"} \\\\ + \\${par_expose_dups:+-E} \\\\ + \\${par_ids:+--ids "\\$par_ids"} \\\\ + \\${par_nids:+--nids "\\$par_nids"} \\\\ + \\${par_maxintron:+-i "\\$par_maxintron"} \\\\ + \\${par_minlen:+-l "\\$par_minlen"} \\\\ + \\${par_range:+-r "\\$par_range"} \\\\ + \\${par_strict_range:+-R} \\\\ + \\${par_jmatch:+--jmatch "\\$par_jmatch"} \\\\ + \\${par_no_single_exon:+-U} \\\\ + \\${par_coding:+-C} \\\\ + \\${par_nc:+--nc} \\\\ + \\${par_ignore_locus:+--ignore-locus} \\\\ + \\${par_description:+-A} \\\\ + \\${par_sort_alpha:+--sort-alpha} \\\\ + \\${par_sort_by:+--sort-by "\\$par_sort_by"} \\\\ + \\${par_keep_attrs:+-F} \\\\ + \\${par_keep_exon_attrs:+--keep-exon-attrs} \\\\ + \\${par_no_exon_attrs:+-G} \\\\ + \\${par_attrs:+--attrs "\\$par_attrs"} \\\\ + \\${par_keep_genes:+--keep-genes} \\\\ + \\${par_keep_comments:+--keep-comments} \\\\ + \\${par_process_other:+-O} \\\\ + \\${par_rm_stop_codons:+-V} \\\\ + \\${par_adj_cds_start:+-H} \\\\ + \\${par_opposite_strand:+-B} \\\\ + \\${par_coding_status:+-P} \\\\ + \\${par_add_hasCDS:+--add-hasCDS} \\\\ + \\${par_adj_stop:+--adj-stop} \\\\ + \\${par_rm_noncanon:+-N} \\\\ + \\${par_complete_cds:+-J} \\\\ + \\${par_no_pseudo:+--no-pseudo} \\\\ + \\${par_in_bed:+--in-bed} \\\\ + \\${par_in_tlf:+--in-tlf} \\\\ + \\${par_stream:+--stream} \\\\ + \\${par_merge:+-M} \\\\ + \\${par_dupinfo:+-d "\\$par_dupinfo"} \\\\ + \\${par_cluster_only:+--cluster-only} \\\\ + \\${par_rm_redundant:+-K} \\\\ + \\${par_no_boundary:+-Q} \\\\ + \\${par_no_overlap:+-Y} +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/gffread", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/nextflow.config new file mode 100644 index 0000000..5992c9d --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'gffread' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Validate, filter, convert and perform various other operations on GFF files.' + author = 'Emma Rousseau' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/nextflow_schema.json new file mode 100644 index 0000000..5ed6b22 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/nextflow_schema.json @@ -0,0 +1,816 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "gffread", +"description": "Validate, filter, convert and perform various other operations on GFF files.", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `annotation.gff`. A reference file in either the GFF3, GFF2 or GTF format", + "help_text": "Type: `file`, required, example: `annotation.gff`. A reference file in either the GFF3, GFF2 or GTF format.\n" + + } + + + , + "chr_mapping": { + "type": + "string", + "description": "Type: `file`. \u003cchr_replace\u003e is a name mapping table for converting reference sequence names, \nhaving this 2-column format: \u003coriginal_ref_ID\u003e \u003cnew_ref_ID\u003e", + "help_text": "Type: `file`. \u003cchr_replace\u003e is a name mapping table for converting reference sequence names, \nhaving this 2-column format: \u003coriginal_ref_ID\u003e \u003cnew_ref_ID\u003e.\n" + + } + + + , + "seq_info": { + "type": + "string", + "description": "Type: `file`. \u003cseq_info", + "help_text": "Type: `file`. \u003cseq_info.fsize\u003e is a tab-delimited file providing this info for each of the mapped \nsequences: \u003cseq-name\u003e \u003cseq-length\u003e \u003cseq-description\u003e (useful for --description option with \nmRNA/EST/protein mappings).\n" + + } + + + , + "genome": { + "type": + "string", + "description": "Type: `file`, example: `genome.fa`. Full path to a multi-fasta file with the genomic sequences for all input mappings, \nOR a directory with single-fasta files (one per genomic sequence, with file names \nmatching sequence names)", + "help_text": "Type: `file`, example: `genome.fa`. Full path to a multi-fasta file with the genomic sequences for all input mappings, \nOR a directory with single-fasta files (one per genomic sequence, with file names \nmatching sequence names).\n" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "outfile": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.outfile.gff`, example: `output.gff`. Write the output records into \u003coutfile\u003e", + "help_text": "Type: `file`, required, default: `$id.$key.outfile.gff`, example: `output.gff`. Write the output records into \u003coutfile\u003e.\n" + , + "default":"$id.$key.outfile.gff" + } + + + , + "force_exons": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Make sure that the lowest level GFF features are considered \"exon\" features", + "help_text": "Type: `boolean_true`, default: `false`. Make sure that the lowest level GFF features are considered \"exon\" features.\n" + , + "default":false + } + + + , + "gene2exon": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. For single-line genes not parenting any transcripts, add an exon feature spanning \nthe entire gene (treat it as a transcript)", + "help_text": "Type: `boolean_true`, default: `false`. For single-line genes not parenting any transcripts, add an exon feature spanning \nthe entire gene (treat it as a transcript).\n" + , + "default":false + } + + + , + "t_adopt": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Try to find a parent gene overlapping/containing a transcript that does not have \nany explicit gene Parent", + "help_text": "Type: `boolean_true`, default: `false`. Try to find a parent gene overlapping/containing a transcript that does not have \nany explicit gene Parent.\n" + , + "default":false + } + + + , + "decode": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Decode url encoded characters within attributes", + "help_text": "Type: `boolean_true`, default: `false`. Decode url encoded characters within attributes.\n" + , + "default":false + } + + + , + "merge_exons": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Merge very close exons into a single exon (when intron size\u003c4)", + "help_text": "Type: `boolean_true`, default: `false`. Merge very close exons into a single exon (when intron size\u003c4).\n" + , + "default":false + } + + + , + "junctions": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output the junctions and the corresponding transcripts", + "help_text": "Type: `boolean_true`, default: `false`. Output the junctions and the corresponding transcripts.\n" + , + "default":false + } + + + , + "spliced_exons": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.spliced_exons.fa`, example: `exons.fa`. Write a fasta file with spliced exons for each transcript", + "help_text": "Type: `file`, default: `$id.$key.spliced_exons.fa`, example: `exons.fa`. Write a fasta file with spliced exons for each transcript.\n" + , + "default":"$id.$key.spliced_exons.fa" + } + + + , + "w_add": { + "type": + "integer", + "description": "Type: `integer`. For the --spliced_exons option, extract additional \u003cN\u003e bases both upstream and \ndownstream of the transcript boundaries", + "help_text": "Type: `integer`. For the --spliced_exons option, extract additional \u003cN\u003e bases both upstream and \ndownstream of the transcript boundaries.\n" + + } + + + , + "w_nocds": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. For --spliced_exons, disable the output of CDS info in the FASTA file", + "help_text": "Type: `boolean_true`, default: `false`. For --spliced_exons, disable the output of CDS info in the FASTA file.\n" + , + "default":false + } + + + , + "spliced_cds": { + "type": + "string", + "description": "Type: `file`, example: `cds.fa`. Write a fasta file with spliced CDS for each GFF transcript", + "help_text": "Type: `file`, example: `cds.fa`. Write a fasta file with spliced CDS for each GFF transcript.\n" + + } + + + , + "tr_cds": { + "type": + "string", + "description": "Type: `file`, example: `tr_cds.fa`. Write a protein fasta file with the translation of CDS for each record", + "help_text": "Type: `file`, example: `tr_cds.fa`. Write a protein fasta file with the translation of CDS for each record.\n" + + } + + + , + "w_coords": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. For --spliced_exons, --spliced_cds and -tr_cds options, write in the FASTA defline \nall the exon coordinates projected onto the spliced sequence", + "help_text": "Type: `boolean_true`, default: `false`. For --spliced_exons, --spliced_cds and -tr_cds options, write in the FASTA defline \nall the exon coordinates projected onto the spliced sequence.\n" + , + "default":false + } + + + , + "stop_dot": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. For --tr_cds option, use \u0027*\u0027 instead of \u0027", + "help_text": "Type: `boolean_true`, default: `false`. For --tr_cds option, use \u0027*\u0027 instead of \u0027.\u0027 as stop codon translation.\n" + , + "default":false + } + + + , + "id_version": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Ensembl GTF to GFF3 conversion, adds version to IDs", + "help_text": "Type: `boolean_true`, default: `false`. Ensembl GTF to GFF3 conversion, adds version to IDs.\n" + , + "default":false + } + + + , + "trackname": { + "type": + "string", + "description": "Type: `string`. Use \u003ctrackname\u003e in the 2nd column of each GFF/GTF output line", + "help_text": "Type: `string`. Use \u003ctrackname\u003e in the 2nd column of each GFF/GTF output line.\n" + + } + + + , + "gtf_output": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Main output will be GTF instead of GFF3", + "help_text": "Type: `boolean_true`, default: `false`. Main output will be GTF instead of GFF3.\n" + , + "default":false + } + + + , + "bed": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output records in BED format instead of default GFF3", + "help_text": "Type: `boolean_true`, default: `false`. Output records in BED format instead of default GFF3.\n" + , + "default":false + } + + + , + "tlf": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output \"transcript line format\" which is like GFF but with exons and CDS related \nfeatures stored as GFF attributes in the transcript feature line, like this:\n exoncount=N;exons=\u003cexons\u003e;CDSphase=\u003cN\u003e;CDS=\u003cCDScoords\u003e\n\u003cexons\u003e is a comma-delimited list of exon_start-exon_end coordinates;\n\u003cCDScoords\u003e is CDS_start:CDS_end coordinates or a list like \u003cexons\u003e", + "help_text": "Type: `boolean_true`, default: `false`. Output \"transcript line format\" which is like GFF but with exons and CDS related \nfeatures stored as GFF attributes in the transcript feature line, like this:\n exoncount=N;exons=\u003cexons\u003e;CDSphase=\u003cN\u003e;CDS=\u003cCDScoords\u003e\n\u003cexons\u003e is a comma-delimited list of exon_start-exon_end coordinates;\n\u003cCDScoords\u003e is CDS_start:CDS_end coordinates or a list like \u003cexons\u003e.\n" + , + "default":false + } + + + , + "table": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. Output a simple tab delimited format instead of GFF, with columns having the values \nof GFF attributes given in \u003cattrlist\u003e; special pseudo-attributes (prefixed by @) are \nrecognized:\n @id, @geneid, @chr, @start, @end, @strand, @numexons, @exons, @cds, @covlen, @cdslen\nIf any of --spliced_exons/--tr_cds/--spliced_cds FASTA output files are enabled, the \nsame fields (excluding @id) are appended to the definition line of corresponding FASTA\nrecords", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. Output a simple tab delimited format instead of GFF, with columns having the values \nof GFF attributes given in \u003cattrlist\u003e; special pseudo-attributes (prefixed by @) are \nrecognized:\n @id, @geneid, @chr, @start, @end, @strand, @numexons, @exons, @cds, @covlen, @cdslen\nIf any of --spliced_exons/--tr_cds/--spliced_cds FASTA output files are enabled, the \nsame fields (excluding @id) are appended to the definition line of corresponding FASTA\nrecords.\n" + + } + + + , + "expose_dups": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Expose (warn about) duplicate transcript IDs and other potential problems with the \ngiven GFF/GTF records", + "help_text": "Type: `boolean_true`, default: `false`. Expose (warn about) duplicate transcript IDs and other potential problems with the \ngiven GFF/GTF records.\n" + , + "default":false + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "ids": { + "type": + "string", + "description": "Type: `file`. Discard records/transcripts if their IDs are not listed in \u003cIDs", + "help_text": "Type: `file`. Discard records/transcripts if their IDs are not listed in \u003cIDs.lst\u003e.\n" + + } + + + , + "nids": { + "type": + "string", + "description": "Type: `file`. Discard records/transcripts if their IDs are listed in \u003cIDs", + "help_text": "Type: `file`. Discard records/transcripts if their IDs are listed in \u003cIDs.lst\u003e.\n" + + } + + + , + "maxintron": { + "type": + "integer", + "description": "Type: `integer`. Discard transcripts having an intron larger than \u003cmaxintron\u003e", + "help_text": "Type: `integer`. Discard transcripts having an intron larger than \u003cmaxintron\u003e.\n" + + } + + + , + "minlen": { + "type": + "integer", + "description": "Type: `integer`. Discard transcripts shorter than \u003cminlen\u003e bases", + "help_text": "Type: `integer`. Discard transcripts shorter than \u003cminlen\u003e bases.\n" + + } + + + , + "range": { + "type": + "string", + "description": "Type: `string`. Only show transcripts overlapping coordinate range \u003cstart\u003e", + "help_text": "Type: `string`. Only show transcripts overlapping coordinate range \u003cstart\u003e..\u003cend\u003e (on chromosome/contig \n\u003cchr\u003e, strand \u003cstrand\u003e if provided).\n" + + } + + + , + "strict_range": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. For --range option, discard all transcripts that are not fully contained within the given \nrange", + "help_text": "Type: `boolean_true`, default: `false`. For --range option, discard all transcripts that are not fully contained within the given \nrange.\n" + , + "default":false + } + + + , + "jmatch": { + "type": + "string", + "description": "Type: `string`. Only output transcripts matching the given junction", + "help_text": "Type: `string`. Only output transcripts matching the given junction.\n" + + } + + + , + "no_single_exon": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Discard single-exon transcripts", + "help_text": "Type: `boolean_true`, default: `false`. Discard single-exon transcripts.\n" + , + "default":false + } + + + , + "coding": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Coding only: discard mRNAs that have no CDS features", + "help_text": "Type: `boolean_true`, default: `false`. Coding only: discard mRNAs that have no CDS features.\n" + , + "default":false + } + + + , + "nc": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Non-coding only: discard mRNAs that have CDS features", + "help_text": "Type: `boolean_true`, default: `false`. Non-coding only: discard mRNAs that have CDS features.\n" + , + "default":false + } + + + , + "ignore_locus": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Discard locus features and attributes found in the input", + "help_text": "Type: `boolean_true`, default: `false`. Discard locus features and attributes found in the input.\n" + , + "default":false + } + + + , + "description": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use the description field from \u003cseq_info", + "help_text": "Type: `boolean_true`, default: `false`. Use the description field from \u003cseq_info.fsize\u003e and add it as the value for a \u0027descr\u0027 \nattribute to the GFF record.\n" + , + "default":false + } + + +} +}, + + + "sorting" : { + "title": "Sorting", + "type": "object", + "description": "No description", + "properties": { + + + "sort_alpha": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Chromosomes (reference sequences) are sorted alphabetically", + "help_text": "Type: `boolean_true`, default: `false`. Chromosomes (reference sequences) are sorted alphabetically.\n" + , + "default":false + } + + + , + "sort_by": { + "type": + "string", + "description": "Type: `file`. Sort the reference sequences by the order in which their names are given in the \n\u003crefseq", + "help_text": "Type: `file`. Sort the reference sequences by the order in which their names are given in the \n\u003crefseq.lst\u003e file.\n" + + } + + +} +}, + + + "misc options" : { + "title": "Misc options", + "type": "object", + "description": "No description", + "properties": { + + + "keep_attrs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Keep all GFF attributes (for non-exon features)", + "help_text": "Type: `boolean_true`, default: `false`. Keep all GFF attributes (for non-exon features).\n" + , + "default":false + } + + + , + "keep_exon_attrs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. For -F option, do not attempt to reduce redundant exon/CDS attributes", + "help_text": "Type: `boolean_true`, default: `false`. For -F option, do not attempt to reduce redundant exon/CDS attributes.\n" + , + "default":false + } + + + , + "no_exon_attrs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Do not keep exon attributes, move them to the transcript feature (for GFF3 output)", + "help_text": "Type: `boolean_true`, default: `false`. Do not keep exon attributes, move them to the transcript feature (for GFF3 output).\n" + , + "default":false + } + + + , + "attrs": { + "type": + "string", + "description": "Type: `string`. Only output the GTF/GFF attributes listed in \u003cattr-list\u003e which is a comma delimited \nlist of attribute names to", + "help_text": "Type: `string`. Only output the GTF/GFF attributes listed in \u003cattr-list\u003e which is a comma delimited \nlist of attribute names to.\n" + + } + + + , + "keep_genes": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. In transcript-only mode (default), also preserve gene records", + "help_text": "Type: `boolean_true`, default: `false`. In transcript-only mode (default), also preserve gene records.\n" + , + "default":false + } + + + , + "keep_comments": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. For GFF3 input/output, try to preserve comments", + "help_text": "Type: `boolean_true`, default: `false`. For GFF3 input/output, try to preserve comments.\n" + , + "default":false + } + + + , + "process_other": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. process other non-transcript GFF records (by default non-transcript records are ignored)", + "help_text": "Type: `boolean_true`, default: `false`. process other non-transcript GFF records (by default non-transcript records are ignored).\n" + , + "default":false + } + + + , + "rm_stop_codons": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Discard any mRNAs with CDS having in-frame stop codons (requires --genome)", + "help_text": "Type: `boolean_true`, default: `false`. Discard any mRNAs with CDS having in-frame stop codons (requires --genome).\n" + , + "default":false + } + + + , + "adj_cds_start": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. For --rm_stop_codons option, check and adjust the starting CDS phase if the original phase\nleads to a translation with an in-frame stop codon", + "help_text": "Type: `boolean_true`, default: `false`. For --rm_stop_codons option, check and adjust the starting CDS phase if the original phase\nleads to a translation with an in-frame stop codon.\n" + , + "default":false + } + + + , + "opposite_strand": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. For -V option, single-exon transcripts are also checked on the opposite strand (requires \n--genome)", + "help_text": "Type: `boolean_true`, default: `false`. For -V option, single-exon transcripts are also checked on the opposite strand (requires \n--genome). \n" + , + "default":false + } + + + , + "coding_status": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Add transcript level GFF attributes about the coding status of each transcript, including \npartialness or in-frame stop codons (requires --genome)", + "help_text": "Type: `boolean_true`, default: `false`. Add transcript level GFF attributes about the coding status of each transcript, including \npartialness or in-frame stop codons (requires --genome).\n" + , + "default":false + } + + + , + "add_hasCDS": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Add a \"hasCDS\" attribute with value \"true\" for transcripts that have CDS features", + "help_text": "Type: `boolean_true`, default: `false`. Add a \"hasCDS\" attribute with value \"true\" for transcripts that have CDS features. \n" + , + "default":false + } + + + , + "adj_stop": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Stop codon adjustment: enables --coding_status and performs automatic adjustment of the CDS stop \ncoordinate if premature or downstream", + "help_text": "Type: `boolean_true`, default: `false`. Stop codon adjustment: enables --coding_status and performs automatic adjustment of the CDS stop \ncoordinate if premature or downstream.\n" + , + "default":false + } + + + , + "rm_noncanon": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus \n(i", + "help_text": "Type: `boolean_true`, default: `false`. Discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus \n(i.e. not GT-AG, GC-AG or AT-AC).\n" + , + "default":false + } + + + , + "complete_cds": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Discard any mRNAs that either lack initial START codon or the terminal STOP codon, or \nhave an in-frame stop codon (i", + "help_text": "Type: `boolean_true`, default: `false`. Discard any mRNAs that either lack initial START codon or the terminal STOP codon, or \nhave an in-frame stop codon (i.e. only print mRNAs with a complete CDS).\n" + , + "default":false + } + + + , + "no_pseudo": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Filter out records matching the \u0027pseudo\u0027 keyword", + "help_text": "Type: `boolean_true`, default: `false`. Filter out records matching the \u0027pseudo\u0027 keyword.\n" + , + "default":false + } + + + , + "in_bed": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input should be parsed as BED format (automatic if the input filename ends with ", + "help_text": "Type: `boolean_true`, default: `false`. Input should be parsed as BED format (automatic if the input filename ends with .bed*).\n" + , + "default":false + } + + + , + "in_tlf": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input GFF-like one-line-per-transcript format without exon/CDS features (see --tlf option \nbelow); automatic if the input filename ends with ", + "help_text": "Type: `boolean_true`, default: `false`. Input GFF-like one-line-per-transcript format without exon/CDS features (see --tlf option \nbelow); automatic if the input filename ends with .tlf).\n" + , + "default":false + } + + + , + "stream": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Fast processing of input GFF/BED transcripts as they are received (no sorting, exons must \nbe grouped by transcript in the input data)", + "help_text": "Type: `boolean_true`, default: `false`. Fast processing of input GFF/BED transcripts as they are received (no sorting, exons must \nbe grouped by transcript in the input data).\n" + , + "default":false + } + + +} +}, + + + "clustering" : { + "title": "Clustering", + "type": "object", + "description": "No description", + "properties": { + + + "merge": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Cluster the input transcripts into loci, discarding \"redundant\" transcripts (those with \nthe same exact introns and fully contained or equal boundaries)", + "help_text": "Type: `boolean_true`, default: `false`. Cluster the input transcripts into loci, discarding \"redundant\" transcripts (those with \nthe same exact introns and fully contained or equal boundaries).\n" + , + "default":false + } + + + , + "dupinfo": { + "type": + "string", + "description": "Type: `file`. For --merge option, write duplication info to file \u003cdupinfo\u003e", + "help_text": "Type: `file`. For --merge option, write duplication info to file \u003cdupinfo\u003e.\n" + + } + + + , + "cluster_only": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Same as --merge but without discarding any of the \"duplicate\" transcripts, only create \n\"locus\" features", + "help_text": "Type: `boolean_true`, default: `false`. Same as --merge but without discarding any of the \"duplicate\" transcripts, only create \n\"locus\" features.\n" + , + "default":false + } + + + , + "rm_redundant": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. For --merge option: also discard as redundant the shorter, fully contained transcripts (intron \nchains matching a part of the container)", + "help_text": "Type: `boolean_true`, default: `false`. For --merge option: also discard as redundant the shorter, fully contained transcripts (intron \nchains matching a part of the container).\n" + , + "default":false + } + + + , + "no_boundary": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. For --merge option, no longer require boundary containment when assessing redundancy (can be \ncombined with --rm_redundant); only introns have to match for multi-exon transcripts, and \u003e=80%\noverlap for single-exon transcripts", + "help_text": "Type: `boolean_true`, default: `false`. For --merge option, no longer require boundary containment when assessing redundancy (can be \ncombined with --rm_redundant); only introns have to match for multi-exon transcripts, and \u003e=80%\noverlap for single-exon transcripts.\n" + , + "default":false + } + + + , + "no_overlap": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. For --merge option, enforce --no_boundary but also discard overlapping single-exon transcripts,\neven on the opposite strand (can be combined with --rm_redudant)", + "help_text": "Type: `boolean_true`, default: `false`. For --merge option, enforce --no_boundary but also discard overlapping single-exon transcripts,\neven on the opposite strand (can be combined with --rm_redudant).\n" + , + "default":false + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/sorting" + }, + + { + "$ref": "#/definitions/misc options" + }, + + { + "$ref": "#/definitions/clustering" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/.config.vsh.yaml new file mode 100644 index 0000000..cfc55dd --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/.config.vsh.yaml @@ -0,0 +1,259 @@ +name: "kallisto_index" +namespace: "kallisto" +version: "v0.3.1" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "Path to a FASTA-file containing the transcriptome sequences, either\ + \ in plain text or \ncompressed (.gz) format.\n" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--d_list" + description: "Path to a FASTA-file containing sequences to mask from quantification.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--index" + info: null + example: + - "Kallisto_index" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Options" + arguments: + - type: "integer" + name: "--kmer_size" + description: "Kmer length passed to indexing step of pseudoaligners (default:\ + \ '31').\n" + info: null + example: + - 31 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--make_unique" + description: "Replace repeated target names with unique names.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--aa" + description: "Generate index from a FASTA-file containing amino acid sequences.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--distiguish" + description: "Generate index where sequences are distinguished by the sequence\ + \ names.\n" + info: null + direction: "input" + - type: "integer" + name: "--min_size" + alternatives: + - "-m" + description: "Length of minimizers (default: automatically chosen).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--ec_max_size" + alternatives: + - "-e" + description: "Maximum number of targets in an equivalence class (default: no maximum).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--tmp" + alternatives: + - "-T" + description: "Path to a directory for temporary files.\n" + info: null + example: + - "tmp" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Build a Kallisto index for the transcriptome to use Kallisto in the\ + \ mapping-based mode.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "kallisto" +- "index" +license: "BSD 2-Clause License" +references: + doi: + - "https://doi.org/10.1038/nbt.3519" +links: + repository: "https://github.com/pachterlab/kallisto" + homepage: "https://pachterlab.github.io/kallisto/about" + documentation: "https://pachterlab.github.io/kallisto/manual" + issue_tracker: "https://github.com/pachterlab/kallisto/issues" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && \\\napt-get install -y --no-install-recommends wget && \\\ + \nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz\ + \ && \\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\nmv kallisto/kallisto\ + \ /usr/local/bin/\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/kallisto/kallisto_index/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/kallisto/kallisto_index" + executable: "target/nextflow/kallisto/kallisto_index/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/main.nf new file mode 100644 index 0000000..0aa0dc4 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/main.nf @@ -0,0 +1,3910 @@ +// kallisto_index v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "kallisto_index", + "namespace" : "kallisto", + "version" : "v0.3.1", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Path to a FASTA-file containing the transcriptome sequences, either in plain text or \ncompressed (.gz) format.\n", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--d_list", + "description" : "Path to a FASTA-file containing sequences to mask from quantification.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--index", + "example" : [ + "Kallisto_index" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "integer", + "name" : "--kmer_size", + "description" : "Kmer length passed to indexing step of pseudoaligners (default: '31').\n", + "example" : [ + 31 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--make_unique", + "description" : "Replace repeated target names with unique names.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--aa", + "description" : "Generate index from a FASTA-file containing amino acid sequences.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--distiguish", + "description" : "Generate index where sequences are distinguished by the sequence names.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--min_size", + "alternatives" : [ + "-m" + ], + "description" : "Length of minimizers (default: automatically chosen).\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--ec_max_size", + "alternatives" : [ + "-e" + ], + "description" : "Maximum number of targets in an equivalence class (default: no maximum).\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--tmp", + "alternatives" : [ + "-T" + ], + "description" : "Path to a directory for temporary files.\n", + "example" : [ + "tmp" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Build a Kallisto index for the transcriptome to use Kallisto in the mapping-based mode.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "kallisto", + "index" + ], + "license" : "BSD 2-Clause License", + "references" : { + "doi" : [ + "https://doi.org/10.1038/nbt.3519" + ] + }, + "links" : { + "repository" : "https://github.com/pachterlab/kallisto", + "homepage" : "https://pachterlab.github.io/kallisto/about", + "documentation" : "https://pachterlab.github.io/kallisto/manual", + "issue_tracker" : "https://github.com/pachterlab/kallisto/issues" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\napt-get install -y --no-install-recommends wget && \\\\\nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \\\\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\\\nmv kallisto/kallisto /usr/local/bin/\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/kallisto/kallisto_index/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/kallisto/kallisto_index", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_D_LIST+x} ]; then echo "${VIASH_PAR_D_LIST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_d_list='&'#" ; else echo "# par_d_list="; fi ) +$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) +$( if [ ! -z ${VIASH_PAR_KMER_SIZE+x} ]; then echo "${VIASH_PAR_KMER_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kmer_size='&'#" ; else echo "# par_kmer_size="; fi ) +$( if [ ! -z ${VIASH_PAR_MAKE_UNIQUE+x} ]; then echo "${VIASH_PAR_MAKE_UNIQUE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_make_unique='&'#" ; else echo "# par_make_unique="; fi ) +$( if [ ! -z ${VIASH_PAR_AA+x} ]; then echo "${VIASH_PAR_AA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_aa='&'#" ; else echo "# par_aa="; fi ) +$( if [ ! -z ${VIASH_PAR_DISTIGUISH+x} ]; then echo "${VIASH_PAR_DISTIGUISH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_distiguish='&'#" ; else echo "# par_distiguish="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_SIZE+x} ]; then echo "${VIASH_PAR_MIN_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_size='&'#" ; else echo "# par_min_size="; fi ) +$( if [ ! -z ${VIASH_PAR_EC_MAX_SIZE+x} ]; then echo "${VIASH_PAR_EC_MAX_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ec_max_size='&'#" ; else echo "# par_ec_max_size="; fi ) +$( if [ ! -z ${VIASH_PAR_TMP+x} ]; then echo "${VIASH_PAR_TMP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tmp='&'#" ; else echo "# par_tmp="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -eo pipefail + +unset_if_false=( par_make_unique par_aa par_distinguish ) + +for var in "\\${unset_if_false[@]}"; do + temp_var="\\${!var}" + [[ "\\$temp_var" == "false" ]] && unset \\$var +done + +if [ -n "\\$par_kmer_size" ]; then + if [[ "\\$par_kmer_size" -lt 1 || "\\$par_kmer_size" -gt 31 || \\$(( par_kmer_size % 2 )) -eq 0 ]]; then + echo "Error: Kmer size must be an odd number between 1 and 31." + exit 1 + fi +fi + +kallisto index \\\\ + -i "\\${par_index}" \\\\ + \\${par_kmer_size:+--kmer-size "\\${par_kmer_size}"} \\\\ + \\${par_make_unique:+--make-unique} \\\\ + \\${par_aa:+--aa} \\\\ + \\${par_distinguish:+--distinguish} \\\\ + \\${par_min_size:+--min-size "\\${par_min_size}"} \\\\ + \\${par_ec_max_size:+--ec-max-size "\\${par_ec_max_size}"} \\\\ + \\${par_d_list:+--d-list "\\${par_d_list}"} \\\\ + \\${meta_cpus:+--threads "\\${meta_cpus}"} \\\\ + \\${par_tmp:+--tmp "\\${par_tmp}"} \\\\ + "\\${par_input}" +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/kallisto/kallisto_index", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/nextflow.config new file mode 100644 index 0000000..c29aae2 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'kallisto/kallisto_index' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Build a Kallisto index for the transcriptome to use Kallisto in the mapping-based mode.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/nextflow_schema.json new file mode 100644 index 0000000..44e9e57 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/nextflow_schema.json @@ -0,0 +1,192 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "kallisto_index", +"description": "Build a Kallisto index for the transcriptome to use Kallisto in the mapping-based mode.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Path to a FASTA-file containing the transcriptome sequences, either in plain text or \ncompressed (", + "help_text": "Type: `file`, required. Path to a FASTA-file containing the transcriptome sequences, either in plain text or \ncompressed (.gz) format.\n" + + } + + + , + "d_list": { + "type": + "string", + "description": "Type: `file`. Path to a FASTA-file containing sequences to mask from quantification", + "help_text": "Type: `file`. Path to a FASTA-file containing sequences to mask from quantification.\n" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "index": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.index`, example: `Kallisto_index`. ", + "help_text": "Type: `file`, default: `$id.$key.index`, example: `Kallisto_index`. " + , + "default":"$id.$key.index" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "kmer_size": { + "type": + "integer", + "description": "Type: `integer`, example: `31`. Kmer length passed to indexing step of pseudoaligners (default: \u002731\u0027)", + "help_text": "Type: `integer`, example: `31`. Kmer length passed to indexing step of pseudoaligners (default: \u002731\u0027).\n" + + } + + + , + "make_unique": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Replace repeated target names with unique names", + "help_text": "Type: `boolean_true`, default: `false`. Replace repeated target names with unique names.\n" + , + "default":false + } + + + , + "aa": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Generate index from a FASTA-file containing amino acid sequences", + "help_text": "Type: `boolean_true`, default: `false`. Generate index from a FASTA-file containing amino acid sequences.\n" + , + "default":false + } + + + , + "distiguish": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Generate index where sequences are distinguished by the sequence names", + "help_text": "Type: `boolean_true`, default: `false`. Generate index where sequences are distinguished by the sequence names.\n" + , + "default":false + } + + + , + "min_size": { + "type": + "integer", + "description": "Type: `integer`. Length of minimizers (default: automatically chosen)", + "help_text": "Type: `integer`. Length of minimizers (default: automatically chosen).\n" + + } + + + , + "ec_max_size": { + "type": + "integer", + "description": "Type: `integer`. Maximum number of targets in an equivalence class (default: no maximum)", + "help_text": "Type: `integer`. Maximum number of targets in an equivalence class (default: no maximum).\n" + + } + + + , + "tmp": { + "type": + "string", + "description": "Type: `string`, example: `tmp`. Path to a directory for temporary files", + "help_text": "Type: `string`, example: `tmp`. Path to a directory for temporary files.\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/.config.vsh.yaml new file mode 100644 index 0000000..d76c8ba --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/.config.vsh.yaml @@ -0,0 +1,287 @@ +name: "kallisto_quant" +namespace: "kallisto" +version: "v0.3.1" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "List of input FastQ files of size 1 and 2 for single-end and paired-end\ + \ data, respectively." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--index" + alternatives: + - "-i" + description: "Kallisto genome index." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_dir" + alternatives: + - "-o" + description: "Directory to write output to." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--log" + description: "File containing log information from running kallisto quant" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Options" + arguments: + - type: "boolean_true" + name: "--single" + description: "Single end mode." + info: null + direction: "input" + - type: "boolean_true" + name: "--single_overhang" + description: "Include reads where unobserved rest of fragment is predicted to\ + \ lie outside a transcript." + info: null + direction: "input" + - type: "boolean_true" + name: "--fr_stranded" + description: "Strand specific reads, first read forward." + info: null + direction: "input" + - type: "boolean_true" + name: "--rf_stranded" + description: "Strand specific reads, first read reverse." + info: null + direction: "input" + - type: "double" + name: "--fragment_length" + alternatives: + - "-l" + description: "The estimated average fragment length." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--sd" + alternatives: + - "-s" + description: "The estimated standard deviation of the fragment length (default:\ + \ -l, -s values are estimated \nfrom paired end data, but are required when\ + \ using --single).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--plaintext" + description: "Output plaintext instead of HDF5." + info: null + direction: "input" + - type: "integer" + name: "--bootstrap_samples" + alternatives: + - "-b" + description: "Number of bootstrap samples to draw. Default: '0'\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seed" + description: "Random seed for bootstrap. Default: '42'\n" + info: null + example: + - 42 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Quantifying abundances of transcripts from RNA-Seq data, or more generally\ + \ of target sequences using high-throughput sequencing reads.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "kallisto" +- "quant" +- "pseudoalignment" +license: "BSD 2-Clause License" +references: + doi: + - "10.1038/nbt.3519" +links: + repository: "https://github.com/pachterlab/kallisto" + homepage: "https://pachterlab.github.io/kallisto/about" + documentation: "https://pachterlab.github.io/kallisto/manual" + issue_tracker: "https://github.com/pachterlab/kallisto/issues" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && \\\napt-get install -y --no-install-recommends wget && \\\ + \nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz\ + \ && \\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\nmv kallisto/kallisto\ + \ /usr/local/bin/\n" + - type: "docker" + run: + - "echo \"kallisto: $(kallisto version | sed 's/kallisto, version //')\" > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/kallisto/kallisto_quant/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/kallisto/kallisto_quant" + executable: "target/nextflow/kallisto/kallisto_quant/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/main.nf new file mode 100644 index 0000000..fc805ab --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/main.nf @@ -0,0 +1,3958 @@ +// kallisto_quant v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "kallisto_quant", + "namespace" : "kallisto", + "version" : "v0.3.1", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--index", + "alternatives" : [ + "-i" + ], + "description" : "Kallisto genome index.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output_dir", + "alternatives" : [ + "-o" + ], + "description" : "Directory to write output to.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--log", + "description" : "File containing log information from running kallisto quant", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--single", + "description" : "Single end mode.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--single_overhang", + "description" : "Include reads where unobserved rest of fragment is predicted to lie outside a transcript.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--fr_stranded", + "description" : "Strand specific reads, first read forward.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--rf_stranded", + "description" : "Strand specific reads, first read reverse.", + "direction" : "input" + }, + { + "type" : "double", + "name" : "--fragment_length", + "alternatives" : [ + "-l" + ], + "description" : "The estimated average fragment length.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--sd", + "alternatives" : [ + "-s" + ], + "description" : "The estimated standard deviation of the fragment length (default: -l, -s values are estimated \nfrom paired end data, but are required when using --single).\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--plaintext", + "description" : "Output plaintext instead of HDF5.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--bootstrap_samples", + "alternatives" : [ + "-b" + ], + "description" : "Number of bootstrap samples to draw. Default: '0'\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seed", + "description" : "Random seed for bootstrap. Default: '42'\n", + "example" : [ + 42 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "kallisto", + "quant", + "pseudoalignment" + ], + "license" : "BSD 2-Clause License", + "references" : { + "doi" : [ + "10.1038/nbt.3519" + ] + }, + "links" : { + "repository" : "https://github.com/pachterlab/kallisto", + "homepage" : "https://pachterlab.github.io/kallisto/about", + "documentation" : "https://pachterlab.github.io/kallisto/manual", + "issue_tracker" : "https://github.com/pachterlab/kallisto/issues" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\napt-get install -y --no-install-recommends wget && \\\\\nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \\\\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\\\nmv kallisto/kallisto /usr/local/bin/\n" + ] + }, + { + "type" : "docker", + "run" : [ + "echo \\"kallisto: $(kallisto version | sed 's/kallisto, version //')\\" > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/kallisto/kallisto_quant/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/kallisto/kallisto_quant", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DIR+x} ]; then echo "${VIASH_PAR_OUTPUT_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_dir='&'#" ; else echo "# par_output_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi ) +$( if [ ! -z ${VIASH_PAR_SINGLE+x} ]; then echo "${VIASH_PAR_SINGLE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_single='&'#" ; else echo "# par_single="; fi ) +$( if [ ! -z ${VIASH_PAR_SINGLE_OVERHANG+x} ]; then echo "${VIASH_PAR_SINGLE_OVERHANG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_single_overhang='&'#" ; else echo "# par_single_overhang="; fi ) +$( if [ ! -z ${VIASH_PAR_FR_STRANDED+x} ]; then echo "${VIASH_PAR_FR_STRANDED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fr_stranded='&'#" ; else echo "# par_fr_stranded="; fi ) +$( if [ ! -z ${VIASH_PAR_RF_STRANDED+x} ]; then echo "${VIASH_PAR_RF_STRANDED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_rf_stranded='&'#" ; else echo "# par_rf_stranded="; fi ) +$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length='&'#" ; else echo "# par_fragment_length="; fi ) +$( if [ ! -z ${VIASH_PAR_SD+x} ]; then echo "${VIASH_PAR_SD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sd='&'#" ; else echo "# par_sd="; fi ) +$( if [ ! -z ${VIASH_PAR_PLAINTEXT+x} ]; then echo "${VIASH_PAR_PLAINTEXT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_plaintext='&'#" ; else echo "# par_plaintext="; fi ) +$( if [ ! -z ${VIASH_PAR_BOOTSTRAP_SAMPLES+x} ]; then echo "${VIASH_PAR_BOOTSTRAP_SAMPLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bootstrap_samples='&'#" ; else echo "# par_bootstrap_samples="; fi ) +$( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "${VIASH_PAR_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seed='&'#" ; else echo "# par_seed="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -eo pipefail + +unset_if_false=( par_single par_single_overhang par_rf_stranded par_fr_stranded par_plaintext ) + +for var in "\\${unset_if_false[@]}"; do + temp_var="\\${!var}" + [[ "\\$temp_var" == "false" ]] && unset \\$var +done + +IFS=";" read -ra input <<< \\$par_input + +# Check if par_single is not set and ensure even number of input files +if [ -z "\\$par_single" ]; then + if [ \\$((\\${#input[@]} % 2)) -ne 0 ]; then + echo "Error: When running in paired-end mode, the number of input files must be even." + echo "Number of input files provided: \\${#input[@]}" + exit 1 + fi +fi + + +mkdir -p \\$par_output_dir + + +kallisto quant \\\\ + \\${meta_cpus:+--threads \\$meta_cpus} \\\\ + -i \\$par_index \\\\ + \\${par_gtf:+--gtf "\\${par_gtf}"} \\\\ + \\${par_single:+--single} \\\\ + \\${par_single_overhang:+--single-overhang} \\\\ + \\${par_fr_stranded:+--fr-stranded} \\\\ + \\${par_rf_stranded:+--rf-stranded} \\\\ + \\${par_plaintext:+--plaintext} \\\\ + \\${par_bootstrap_samples:+--bootstrap-samples "\\${par_bootstrap_samples}"} \\\\ + \\${par_fragment_length:+--fragment-length "\\${par_fragment_length}"} \\\\ + \\${par_sd:+--sd "\\${par_sd}"} \\\\ + \\${par_seed:+--seed "\\${par_seed}"} \\\\ + -o \\$par_output_dir \\\\ + \\${input[*]} 2> >(tee -a \\$par_log >&2) +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/kallisto/kallisto_quant", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/nextflow.config new file mode 100644 index 0000000..50167f8 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'kallisto/kallisto_quant' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/nextflow_schema.json new file mode 100644 index 0000000..384e153 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/nextflow_schema.json @@ -0,0 +1,225 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "kallisto_quant", +"description": "Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, multiple_sep: `\";\"`. List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively", + "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively." + + } + + + , + "index": { + "type": + "string", + "description": "Type: `file`, required. Kallisto genome index", + "help_text": "Type: `file`, required. Kallisto genome index." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_dir": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output_dir`. Directory to write output to", + "help_text": "Type: `file`, required, default: `$id.$key.output_dir`. Directory to write output to." + , + "default":"$id.$key.output_dir" + } + + + , + "log": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.log`. File containing log information from running kallisto quant", + "help_text": "Type: `file`, default: `$id.$key.log`. File containing log information from running kallisto quant" + , + "default":"$id.$key.log" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "single": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Single end mode", + "help_text": "Type: `boolean_true`, default: `false`. Single end mode." + , + "default":false + } + + + , + "single_overhang": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Include reads where unobserved rest of fragment is predicted to lie outside a transcript", + "help_text": "Type: `boolean_true`, default: `false`. Include reads where unobserved rest of fragment is predicted to lie outside a transcript." + , + "default":false + } + + + , + "fr_stranded": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Strand specific reads, first read forward", + "help_text": "Type: `boolean_true`, default: `false`. Strand specific reads, first read forward." + , + "default":false + } + + + , + "rf_stranded": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Strand specific reads, first read reverse", + "help_text": "Type: `boolean_true`, default: `false`. Strand specific reads, first read reverse." + , + "default":false + } + + + , + "fragment_length": { + "type": + "number", + "description": "Type: `double`. The estimated average fragment length", + "help_text": "Type: `double`. The estimated average fragment length." + + } + + + , + "sd": { + "type": + "number", + "description": "Type: `double`. The estimated standard deviation of the fragment length (default: -l, -s values are estimated \nfrom paired end data, but are required when using --single)", + "help_text": "Type: `double`. The estimated standard deviation of the fragment length (default: -l, -s values are estimated \nfrom paired end data, but are required when using --single).\n" + + } + + + , + "plaintext": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output plaintext instead of HDF5", + "help_text": "Type: `boolean_true`, default: `false`. Output plaintext instead of HDF5." + , + "default":false + } + + + , + "bootstrap_samples": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Number of bootstrap samples to draw", + "help_text": "Type: `integer`, example: `0`. Number of bootstrap samples to draw. Default: \u00270\u0027\n" + + } + + + , + "seed": { + "type": + "integer", + "description": "Type: `integer`, example: `42`. Random seed for bootstrap", + "help_text": "Type: `integer`, example: `42`. Random seed for bootstrap. Default: \u002742\u0027\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/.config.vsh.yaml new file mode 100644 index 0000000..a191eda --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/.config.vsh.yaml @@ -0,0 +1,497 @@ +name: "multiqc" +version: "v0.3.1" +authors: +- name: "Dorien Roosen" + roles: + - "author" + - "maintainer" + info: + links: + email: "dorien@data-intuitive.com" + github: "dorien-er" + linkedin: "dorien-roosen" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "File paths to be searched for analysis results to be included in\ + \ the report.\n" + info: null + example: + - "data/results" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Ouput" + arguments: + - type: "file" + name: "--output_report" + description: "Filepath of the generated report.\n" + info: null + example: + - "multiqc_report.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_data" + description: "Output directory for parsed data files. If not provided, parsed\ + \ data will not be published.\n" + info: null + example: + - "multiqc_data" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_plots" + description: "Output directory for generated plots. If not provided, plots will\ + \ not be published.\n" + info: null + example: + - "multiqc_plots" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Modules and analyses to run" + arguments: + - type: "string" + name: "--include_modules" + description: "Use only these module" + info: null + example: + - "fastqc" + - "cutadapt" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--exclude_modules" + description: "Do not use only these modules" + info: null + example: + - "fastqc" + - "cutadapt" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--ignore_analysis" + info: null + example: + - "run_one/*" + - "run_two/*" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--ignore_samples" + info: null + example: + - "sample_1*" + - "sample_3*" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "boolean_true" + name: "--ignore_symlinks" + description: "Ignore symlinked directories and files" + info: null + direction: "input" +- name: "Sample name handling" + arguments: + - type: "boolean_true" + name: "--dirs" + description: "Prepend directory to sample names to avoid clashing filenames" + info: null + direction: "input" + - type: "integer" + name: "--dirs_depth" + description: "Prepend n directories to sample names. Negative number to take from\ + \ start of path." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--full_names" + description: "Do not clean the sample names (leave as full file name)" + info: null + direction: "input" + - type: "boolean_true" + name: "--fn_as_s_name" + description: "Use the log filename as the sample name" + info: null + direction: "input" + - type: "file" + name: "--replace_names" + description: "TSV file to rename sample names during report generation" + info: null + example: + - "replace_names.tsv" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Report Customisation" + arguments: + - type: "string" + name: "--title" + description: "Report title. Printed as page header, used for filename if not otherwise\ + \ specified.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--comment" + description: "Custom comment, will be printed at the top of the report.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--template" + description: "Report template to use.\n" + info: null + required: false + choices: + - "default" + - "gathered" + - "geo" + - "highcharts" + - "sections" + - "simple" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sample_names" + description: "TSV file containing alternative sample names for renaming buttons\ + \ in the report.\n" + info: null + example: + - "sample_names.tsv" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sample_filters" + description: "TSV file containing show/hide patterns for the report\n" + info: null + example: + - "sample_filters.tsv" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--custom_css_file" + description: "Custom CSS file to add to the final report\n" + info: null + example: + - "custom_style_sheet.css" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--profile_runtime" + description: "Add analysis of how long MultiQC takes to run to the report\n" + info: null + direction: "input" +- name: "MultiQC behaviour" + arguments: + - type: "boolean_true" + name: "--verbose" + description: "Increase output verbosity.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--quiet" + description: "Only show log warnings\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--strict" + description: "Don't catch exceptions, run additional code checks to help development.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--development" + description: "Development mode. Do not compress and minimise JS, export uncompressed\ + \ plot data.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--require_logs" + description: "Require all explicitly requested modules to have log files. If not,\ + \ MultiQC will exit with an error.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_megaqc_upload" + description: "Don't upload generated report to MegaQC, even if MegaQC options\ + \ are found.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_ansi" + description: "Disable coloured log output.\n" + info: null + direction: "input" + - type: "string" + name: "--cl_config" + description: "YAML formatted string that allows to customize MultiQC behaviour\ + \ like input file detection.\n" + info: null + example: + - "qualimap_config: { general_stats_coverage: [20,40,200] }" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output format" + arguments: + - type: "boolean_true" + name: "--flat" + description: "Use only flat plots (static images).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--interactive" + description: "Use only interactive plots (in-browser Javascript).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--data_dir" + description: "Force the parsed data directory to be created.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_data_dir" + description: "Prevent the parsed data directory from being created.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--zip_data_dir" + description: "Compress the data directory.\n" + info: null + direction: "input" + - type: "string" + name: "--data_format" + description: "Output parsed data in a different format than the default 'txt'.\n" + info: null + required: false + choices: + - "tsv" + - "csv" + - "json" + - "yaml" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--pdf" + description: "Creates PDF report with the 'simple' template. Requires Pandoc to\ + \ be installed.\n" + info: null + direction: "input" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "MultiQC aggregates results from bioinformatics analyses across many\ + \ samples into a single report.\nIt searches a given directory for analysis logs\ + \ and compiles a HTML report. It's a general use tool, perfect for summarising the\ + \ output from numerous bioinformatics tools.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: + keywords: + - "QC" + - "html report" + - "aggregate analysis" + links: + homepage: "https://multiqc.info/" + documentation: "https://multiqc.info/docs/" + repository: "https://github.com/MultiQC/MultiQC" + references: + doi: "10.1093/bioinformatics/btw354" + licence: "GPL v3 or later" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +license: "MIT" +links: + repository: "https://github.com/viash-hub/biobox" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "multiqc --version | sed 's/multiqc, version\\s\\(.*\\)/multiqc: \"\\1\"/' >\ + \ /var/software_versions.txt\n" + test_setup: + - type: "apt" + packages: + - "jq" + interactive: false + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/multiqc/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/multiqc" + executable: "target/nextflow/multiqc/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/main.nf new file mode 100644 index 0000000..b1e672a --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/main.nf @@ -0,0 +1,4336 @@ +// multiqc v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dorien Roosen (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "multiqc", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Dorien Roosen", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "dorien@data-intuitive.com", + "github" : "dorien-er", + "linkedin" : "dorien-roosen" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "File paths to be searched for analysis results to be included in the report.\n", + "example" : [ + "data/results" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Ouput", + "arguments" : [ + { + "type" : "file", + "name" : "--output_report", + "description" : "Filepath of the generated report.\n", + "example" : [ + "multiqc_report.html" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_data", + "description" : "Output directory for parsed data files. If not provided, parsed data will not be published.\n", + "example" : [ + "multiqc_data" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_plots", + "description" : "Output directory for generated plots. If not provided, plots will not be published.\n", + "example" : [ + "multiqc_plots" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Modules and analyses to run", + "arguments" : [ + { + "type" : "string", + "name" : "--include_modules", + "description" : "Use only these module", + "example" : [ + "fastqc", + "cutadapt" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--exclude_modules", + "description" : "Do not use only these modules", + "example" : [ + "fastqc", + "cutadapt" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--ignore_analysis", + "example" : [ + "run_one/*", + "run_two/*" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--ignore_samples", + "example" : [ + "sample_1*", + "sample_3*" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--ignore_symlinks", + "description" : "Ignore symlinked directories and files", + "direction" : "input" + } + ] + }, + { + "name" : "Sample name handling", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--dirs", + "description" : "Prepend directory to sample names to avoid clashing filenames", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--dirs_depth", + "description" : "Prepend n directories to sample names. Negative number to take from start of path.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--full_names", + "description" : "Do not clean the sample names (leave as full file name)", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--fn_as_s_name", + "description" : "Use the log filename as the sample name", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--replace_names", + "description" : "TSV file to rename sample names during report generation", + "example" : [ + "replace_names.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Report Customisation", + "arguments" : [ + { + "type" : "string", + "name" : "--title", + "description" : "Report title. Printed as page header, used for filename if not otherwise specified.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--comment", + "description" : "Custom comment, will be printed at the top of the report.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--template", + "description" : "Report template to use.\n", + "required" : false, + "choices" : [ + "default", + "gathered", + "geo", + "highcharts", + "sections", + "simple" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sample_names", + "description" : "TSV file containing alternative sample names for renaming buttons in the report.\n", + "example" : [ + "sample_names.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sample_filters", + "description" : "TSV file containing show/hide patterns for the report\n", + "example" : [ + "sample_filters.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--custom_css_file", + "description" : "Custom CSS file to add to the final report\n", + "example" : [ + "custom_style_sheet.css" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--profile_runtime", + "description" : "Add analysis of how long MultiQC takes to run to the report\n", + "direction" : "input" + } + ] + }, + { + "name" : "MultiQC behaviour", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--verbose", + "description" : "Increase output verbosity.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--quiet", + "description" : "Only show log warnings\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--strict", + "description" : "Don't catch exceptions, run additional code checks to help development.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--development", + "description" : "Development mode. Do not compress and minimise JS, export uncompressed plot data.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--require_logs", + "description" : "Require all explicitly requested modules to have log files. If not, MultiQC will exit with an error.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_megaqc_upload", + "description" : "Don't upload generated report to MegaQC, even if MegaQC options are found.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_ansi", + "description" : "Disable coloured log output.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--cl_config", + "description" : "YAML formatted string that allows to customize MultiQC behaviour like input file detection.\n", + "example" : [ + "qualimap_config: { general_stats_coverage: [20,40,200] }" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output format", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--flat", + "description" : "Use only flat plots (static images).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--interactive", + "description" : "Use only interactive plots (in-browser Javascript).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--data_dir", + "description" : "Force the parsed data directory to be created.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_data_dir", + "description" : "Prevent the parsed data directory from being created.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--zip_data_dir", + "description" : "Compress the data directory.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--data_format", + "description" : "Output parsed data in a different format than the default 'txt'.\n", + "required" : false, + "choices" : [ + "tsv", + "csv", + "json", + "yaml" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--pdf", + "description" : "Creates PDF report with the 'simple' template. Requires Pandoc to be installed.\n", + "direction" : "input" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It's a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "info" : { + "keywords" : [ + "QC", + "html report", + "aggregate analysis" + ], + "links" : { + "homepage" : "https://multiqc.info/", + "documentation" : "https://multiqc.info/docs/", + "repository" : "https://github.com/MultiQC/MultiQC" + }, + "references" : { + "doi" : "10.1093/bioinformatics/btw354" + }, + "licence" : "GPL v3 or later" + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/biobox" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "multiqc --version | sed 's/multiqc, version\\\\s\\\\(.*\\\\)/multiqc: \\"\\\\1\\"/' > /var/software_versions.txt\n" + ] + } + ], + "test_setup" : [ + { + "type" : "apt", + "packages" : [ + "jq" + ], + "interactive" : false + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/multiqc/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/multiqc", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_REPORT+x} ]; then echo "${VIASH_PAR_OUTPUT_REPORT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_report='&'#" ; else echo "# par_output_report="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DATA+x} ]; then echo "${VIASH_PAR_OUTPUT_DATA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_data='&'#" ; else echo "# par_output_data="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_PLOTS+x} ]; then echo "${VIASH_PAR_OUTPUT_PLOTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_plots='&'#" ; else echo "# par_output_plots="; fi ) +$( if [ ! -z ${VIASH_PAR_INCLUDE_MODULES+x} ]; then echo "${VIASH_PAR_INCLUDE_MODULES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_include_modules='&'#" ; else echo "# par_include_modules="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE_MODULES+x} ]; then echo "${VIASH_PAR_EXCLUDE_MODULES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_exclude_modules='&'#" ; else echo "# par_exclude_modules="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_ANALYSIS+x} ]; then echo "${VIASH_PAR_IGNORE_ANALYSIS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_analysis='&'#" ; else echo "# par_ignore_analysis="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_SAMPLES+x} ]; then echo "${VIASH_PAR_IGNORE_SAMPLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_samples='&'#" ; else echo "# par_ignore_samples="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_SYMLINKS+x} ]; then echo "${VIASH_PAR_IGNORE_SYMLINKS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_symlinks='&'#" ; else echo "# par_ignore_symlinks="; fi ) +$( if [ ! -z ${VIASH_PAR_DIRS+x} ]; then echo "${VIASH_PAR_DIRS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dirs='&'#" ; else echo "# par_dirs="; fi ) +$( if [ ! -z ${VIASH_PAR_DIRS_DEPTH+x} ]; then echo "${VIASH_PAR_DIRS_DEPTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dirs_depth='&'#" ; else echo "# par_dirs_depth="; fi ) +$( if [ ! -z ${VIASH_PAR_FULL_NAMES+x} ]; then echo "${VIASH_PAR_FULL_NAMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_full_names='&'#" ; else echo "# par_full_names="; fi ) +$( if [ ! -z ${VIASH_PAR_FN_AS_S_NAME+x} ]; then echo "${VIASH_PAR_FN_AS_S_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fn_as_s_name='&'#" ; else echo "# par_fn_as_s_name="; fi ) +$( if [ ! -z ${VIASH_PAR_REPLACE_NAMES+x} ]; then echo "${VIASH_PAR_REPLACE_NAMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_replace_names='&'#" ; else echo "# par_replace_names="; fi ) +$( if [ ! -z ${VIASH_PAR_TITLE+x} ]; then echo "${VIASH_PAR_TITLE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_title='&'#" ; else echo "# par_title="; fi ) +$( if [ ! -z ${VIASH_PAR_COMMENT+x} ]; then echo "${VIASH_PAR_COMMENT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_comment='&'#" ; else echo "# par_comment="; fi ) +$( if [ ! -z ${VIASH_PAR_TEMPLATE+x} ]; then echo "${VIASH_PAR_TEMPLATE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_template='&'#" ; else echo "# par_template="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_NAMES+x} ]; then echo "${VIASH_PAR_SAMPLE_NAMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_names='&'#" ; else echo "# par_sample_names="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_FILTERS+x} ]; then echo "${VIASH_PAR_SAMPLE_FILTERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_filters='&'#" ; else echo "# par_sample_filters="; fi ) +$( if [ ! -z ${VIASH_PAR_CUSTOM_CSS_FILE+x} ]; then echo "${VIASH_PAR_CUSTOM_CSS_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_custom_css_file='&'#" ; else echo "# par_custom_css_file="; fi ) +$( if [ ! -z ${VIASH_PAR_PROFILE_RUNTIME+x} ]; then echo "${VIASH_PAR_PROFILE_RUNTIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_profile_runtime='&'#" ; else echo "# par_profile_runtime="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) +$( if [ ! -z ${VIASH_PAR_STRICT+x} ]; then echo "${VIASH_PAR_STRICT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strict='&'#" ; else echo "# par_strict="; fi ) +$( if [ ! -z ${VIASH_PAR_DEVELOPMENT+x} ]; then echo "${VIASH_PAR_DEVELOPMENT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_development='&'#" ; else echo "# par_development="; fi ) +$( if [ ! -z ${VIASH_PAR_REQUIRE_LOGS+x} ]; then echo "${VIASH_PAR_REQUIRE_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_require_logs='&'#" ; else echo "# par_require_logs="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_MEGAQC_UPLOAD+x} ]; then echo "${VIASH_PAR_NO_MEGAQC_UPLOAD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_megaqc_upload='&'#" ; else echo "# par_no_megaqc_upload="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_ANSI+x} ]; then echo "${VIASH_PAR_NO_ANSI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_ansi='&'#" ; else echo "# par_no_ansi="; fi ) +$( if [ ! -z ${VIASH_PAR_CL_CONFIG+x} ]; then echo "${VIASH_PAR_CL_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cl_config='&'#" ; else echo "# par_cl_config="; fi ) +$( if [ ! -z ${VIASH_PAR_FLAT+x} ]; then echo "${VIASH_PAR_FLAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_flat='&'#" ; else echo "# par_flat="; fi ) +$( if [ ! -z ${VIASH_PAR_INTERACTIVE+x} ]; then echo "${VIASH_PAR_INTERACTIVE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_interactive='&'#" ; else echo "# par_interactive="; fi ) +$( if [ ! -z ${VIASH_PAR_DATA_DIR+x} ]; then echo "${VIASH_PAR_DATA_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_data_dir='&'#" ; else echo "# par_data_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_DATA_DIR+x} ]; then echo "${VIASH_PAR_NO_DATA_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_data_dir='&'#" ; else echo "# par_no_data_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_ZIP_DATA_DIR+x} ]; then echo "${VIASH_PAR_ZIP_DATA_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_zip_data_dir='&'#" ; else echo "# par_zip_data_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_DATA_FORMAT+x} ]; then echo "${VIASH_PAR_DATA_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_data_format='&'#" ; else echo "# par_data_format="; fi ) +$( if [ ! -z ${VIASH_PAR_PDF+x} ]; then echo "${VIASH_PAR_PDF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pdf='&'#" ; else echo "# par_pdf="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +# disable flags +unset_if_false=( + par_ignore_symlinks + par_dirs + par_full_names + par_fn_as_s_name + par_profile_runtime + par_verbose + par_quiet + par_strict + par_development + par_require_logs + par_no_megaqc_upload + par_no_ansi + par_flat + par_interactive + par_static_plot_export + par_data_dir + par_no_data_dir + par_zip_data_dir + par_pdf +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +# handle inputs +out_dir=\\$(dirname "\\$par_output_report") +output_report_file=\\$(basename "\\$par_output_report") +report_name="\\${output_report_file%.*}" + +# handle outputs +[[ -z "\\$par_output_report" ]] && no_report=true +[[ -z "\\$par_output_data" ]] && no_data_dir=true +[[ ! -z "\\$par_output_data" ]] && data_dir=true +[[ ! -z "\\$par_output_plots" ]] && export=true + +# handle multiples +IFS=";" read -ra inputs <<< \\$par_input + +if [[ -n "\\$par_include_modules" ]]; then + include_modules="" + IFS=";" read -ra incl_modules <<< \\$par_include_modules + for i in "\\${incl_modules[@]}"; do + include_modules+="--include \\$i " + done + unset IFS +fi + +if [[ -n "\\$par_exclude_modules" ]]; then + exclude_modules="" + IFS=";" read -ra excl_modules <<< \\$par_exclude_modules + for i in "\\${excl_modules[@]}"; do + exclude_modules+="--exclude \\$i" + done + unset IFS +fi + +if [[ -n "\\$par_ignore_analysis" ]]; then + ignore="" + IFS=";" read -ra ignore_analysis <<< \\$par_ignore_analysis + for i in "\\${ignore_analysis[@]}"; do + ignore+="--ignore \\$i " + done + unset IFS +fi + +if [[ -n "\\$par_ignore_samples" ]]; then + ignore_samples="" + IFS=";" read -ra ign_samples <<< \\$par_ignore_samples + for i in "\\${ign_samples[@]}"; do + ignore_samples+="--ignore-samples \\$i" + done + unset IFS +fi + +# run multiqc +multiqc \\\\ + \\${par_output_report:+--filename "\\$report_name"} \\\\ + \\${out_dir:+--outdir "\\$out_dir"} \\\\ + \\${no_report:+--no-report} \\\\ + \\${no_data_dir:+--no-data-dir} \\\\ + \\${data_dir:+--data-dir} \\\\ + \\${export:+--export} \\\\ + \\${par_title:+--title "\\$par_title"} \\\\ + \\${par_comment:+--comment "\\$par_comment"} \\\\ + \\${par_template:+--template "\\$par_template"} \\\\ + \\${par_sample_names:+--sample-names "\\$par_sample_names"} \\\\ + \\${par_sample_filters:+--sample-filters "\\$par_sample_filters"} \\\\ + \\${par_custom_css_file:+--custom-css-file "\\$par_custom_css_file"} \\\\ + \\${par_profile_runtime:+--profile-runtime} \\\\ + \\${par_dirs:+--dirs} \\\\ + \\${par_dirs_depth:+--dirs-depth "\\$par_dirs_depth"} \\\\ + \\${par_full_names:+--full-names} \\\\ + \\${par_fn_as_s_name:+--fn-as-s-name} \\\\ + \\${par_ignore_names:+--ignore-names "\\$par_ignore_names"} \\\\ + \\${par_ignore_symlinks:+--ignore-symlinks} \\\\ + \\${ignore_samples} \\\\ + \\${ignore} \\\\ + \\${exclude_modules} \\\\ + \\${include_modules} \\\\ + \\${par_include_modules:+--include-modules "\\$par_include_modules"} \\\\ + \\${par_data_format:+--data-format "\\$par_data_format"} \\\\ + \\${par_cl_config:+--cl-config "\\$par_cl_config"} \\\\ + \\${par_zip_data_dir:+--zip-data-dir} \\\\ + \\${par_pdf:+--pdf} \\\\ + \\${par_interactive:+--interactive} \\\\ + \\${par_flat:+--flat} \\\\ + \\${par_verbose:+--verbose} \\\\ + \\${par_quiet:+--quiet} \\\\ + \\${par_strict:+--strict} \\\\ + \\${par_no_megaqc_upload:+--no-megaqc-upload} \\\\ + \\${par_no_ansi:+--no-ansi} \\\\ + \\${par_profile_runtime:+--profile-runtime} \\\\ + \\${par_require_logs:+--require-logs} \\\\ + \\${par_development:+--development} \\\\ + --force \\\\ + "\\${inputs[@]}" + +# Move outputs + +if [[ -n "\\$par_output_data" ]] && [[ -d "\\${out_dir}/\\${report_name}_data" ]]; then + mv "\\${out_dir}/\\${report_name}_data" "\\$par_output_data" +elif [[ -n "\\$par_output_data" ]] && [[ ! -d "\\${out_dir}/\\${report_name}_data" ]]; then + echo "WARNING: Data could not be saved because data folder was not generated by multiqc. This could be due to filtering out of modules or samples." +fi + +if [[ -n "\\$par_output_plots" ]] && [[ -d "\\${out_dir}/\\${report_name}_plots" ]]; then + mv "\\${out_dir}/\\${report_name}_plots" "\\$par_output_plots" +elif [[ -n "\\$par_output_plots" ]] && [[ ! -d "\\${out_dir}/\\${report_name}_plots" ]]; then + echo "WARNING: Plots could not be saved because plots folder was not generated by multiqc. This could be due to filtering out of modules or samples." +fi +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/multiqc", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow.config new file mode 100644 index 0000000..3efa32d --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'multiqc' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It\'s a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n' + author = 'Dorien Roosen' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow_schema.json new file mode 100644 index 0000000..04c0a3b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow_schema.json @@ -0,0 +1,529 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "multiqc", +"description": "MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It\u0027s a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `data/results`, multiple_sep: `\";\"`. File paths to be searched for analysis results to be included in the report", + "help_text": "Type: List of `file`, required, example: `data/results`, multiple_sep: `\";\"`. File paths to be searched for analysis results to be included in the report.\n" + + } + + +} +}, + + + "ouput" : { + "title": "Ouput", + "type": "object", + "description": "No description", + "properties": { + + + "output_report": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_report.html`, example: `multiqc_report.html`. Filepath of the generated report", + "help_text": "Type: `file`, default: `$id.$key.output_report.html`, example: `multiqc_report.html`. Filepath of the generated report.\n" + , + "default":"$id.$key.output_report.html" + } + + + , + "output_data": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_data`, example: `multiqc_data`. Output directory for parsed data files", + "help_text": "Type: `file`, default: `$id.$key.output_data`, example: `multiqc_data`. Output directory for parsed data files. If not provided, parsed data will not be published.\n" + , + "default":"$id.$key.output_data" + } + + + , + "output_plots": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_plots`, example: `multiqc_plots`. Output directory for generated plots", + "help_text": "Type: `file`, default: `$id.$key.output_plots`, example: `multiqc_plots`. Output directory for generated plots. If not provided, plots will not be published.\n" + , + "default":"$id.$key.output_plots" + } + + +} +}, + + + "modules and analyses to run" : { + "title": "Modules and analyses to run", + "type": "object", + "description": "No description", + "properties": { + + + "include_modules": { + "type": + "string", + "description": "Type: List of `string`, example: `fastqc;cutadapt`, multiple_sep: `\";\"`. Use only these module", + "help_text": "Type: List of `string`, example: `fastqc;cutadapt`, multiple_sep: `\";\"`. Use only these module" + + } + + + , + "exclude_modules": { + "type": + "string", + "description": "Type: List of `string`, example: `fastqc;cutadapt`, multiple_sep: `\";\"`. Do not use only these modules", + "help_text": "Type: List of `string`, example: `fastqc;cutadapt`, multiple_sep: `\";\"`. Do not use only these modules" + + } + + + , + "ignore_analysis": { + "type": + "string", + "description": "Type: List of `string`, example: `run_one/*;run_two/*`, multiple_sep: `\";\"`. ", + "help_text": "Type: List of `string`, example: `run_one/*;run_two/*`, multiple_sep: `\";\"`. " + + } + + + , + "ignore_samples": { + "type": + "string", + "description": "Type: List of `string`, example: `sample_1*;sample_3*`, multiple_sep: `\";\"`. ", + "help_text": "Type: List of `string`, example: `sample_1*;sample_3*`, multiple_sep: `\";\"`. " + + } + + + , + "ignore_symlinks": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Ignore symlinked directories and files", + "help_text": "Type: `boolean_true`, default: `false`. Ignore symlinked directories and files" + , + "default":false + } + + +} +}, + + + "sample name handling" : { + "title": "Sample name handling", + "type": "object", + "description": "No description", + "properties": { + + + "dirs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Prepend directory to sample names to avoid clashing filenames", + "help_text": "Type: `boolean_true`, default: `false`. Prepend directory to sample names to avoid clashing filenames" + , + "default":false + } + + + , + "dirs_depth": { + "type": + "integer", + "description": "Type: `integer`. Prepend n directories to sample names", + "help_text": "Type: `integer`. Prepend n directories to sample names. Negative number to take from start of path." + + } + + + , + "full_names": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Do not clean the sample names (leave as full file name)", + "help_text": "Type: `boolean_true`, default: `false`. Do not clean the sample names (leave as full file name)" + , + "default":false + } + + + , + "fn_as_s_name": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use the log filename as the sample name", + "help_text": "Type: `boolean_true`, default: `false`. Use the log filename as the sample name" + , + "default":false + } + + + , + "replace_names": { + "type": + "string", + "description": "Type: `file`, example: `replace_names.tsv`. TSV file to rename sample names during report generation", + "help_text": "Type: `file`, example: `replace_names.tsv`. TSV file to rename sample names during report generation" + + } + + +} +}, + + + "report customisation" : { + "title": "Report Customisation", + "type": "object", + "description": "No description", + "properties": { + + + "title": { + "type": + "string", + "description": "Type: `string`. Report title", + "help_text": "Type: `string`. Report title. Printed as page header, used for filename if not otherwise specified.\n" + + } + + + , + "comment": { + "type": + "string", + "description": "Type: `string`. Custom comment, will be printed at the top of the report", + "help_text": "Type: `string`. Custom comment, will be printed at the top of the report.\n" + + } + + + , + "template": { + "type": + "string", + "description": "Type: `string`, choices: ``default`, `gathered`, `geo`, `highcharts`, `sections`, `simple``. Report template to use", + "help_text": "Type: `string`, choices: ``default`, `gathered`, `geo`, `highcharts`, `sections`, `simple``. Report template to use.\n", + "enum": ["default", "gathered", "geo", "highcharts", "sections", "simple"] + + + } + + + , + "sample_names": { + "type": + "string", + "description": "Type: `file`, example: `sample_names.tsv`. TSV file containing alternative sample names for renaming buttons in the report", + "help_text": "Type: `file`, example: `sample_names.tsv`. TSV file containing alternative sample names for renaming buttons in the report.\n" + + } + + + , + "sample_filters": { + "type": + "string", + "description": "Type: `file`, example: `sample_filters.tsv`. TSV file containing show/hide patterns for the report\n", + "help_text": "Type: `file`, example: `sample_filters.tsv`. TSV file containing show/hide patterns for the report\n" + + } + + + , + "custom_css_file": { + "type": + "string", + "description": "Type: `file`, example: `custom_style_sheet.css`. Custom CSS file to add to the final report\n", + "help_text": "Type: `file`, example: `custom_style_sheet.css`. Custom CSS file to add to the final report\n" + + } + + + , + "profile_runtime": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Add analysis of how long MultiQC takes to run to the report\n", + "help_text": "Type: `boolean_true`, default: `false`. Add analysis of how long MultiQC takes to run to the report\n" + , + "default":false + } + + +} +}, + + + "multiqc behaviour" : { + "title": "MultiQC behaviour", + "type": "object", + "description": "No description", + "properties": { + + + "verbose": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Increase output verbosity", + "help_text": "Type: `boolean_true`, default: `false`. Increase output verbosity.\n" + , + "default":false + } + + + , + "quiet": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Only show log warnings\n", + "help_text": "Type: `boolean_true`, default: `false`. Only show log warnings\n" + , + "default":false + } + + + , + "strict": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Don\u0027t catch exceptions, run additional code checks to help development", + "help_text": "Type: `boolean_true`, default: `false`. Don\u0027t catch exceptions, run additional code checks to help development.\n" + , + "default":false + } + + + , + "development": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Development mode", + "help_text": "Type: `boolean_true`, default: `false`. Development mode. Do not compress and minimise JS, export uncompressed plot data.\n" + , + "default":false + } + + + , + "require_logs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Require all explicitly requested modules to have log files", + "help_text": "Type: `boolean_true`, default: `false`. Require all explicitly requested modules to have log files. If not, MultiQC will exit with an error.\n" + , + "default":false + } + + + , + "no_megaqc_upload": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Don\u0027t upload generated report to MegaQC, even if MegaQC options are found", + "help_text": "Type: `boolean_true`, default: `false`. Don\u0027t upload generated report to MegaQC, even if MegaQC options are found.\n" + , + "default":false + } + + + , + "no_ansi": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disable coloured log output", + "help_text": "Type: `boolean_true`, default: `false`. Disable coloured log output.\n" + , + "default":false + } + + + , + "cl_config": { + "type": + "string", + "description": "Type: `string`, example: `qualimap_config: { general_stats_coverage: [20,40,200] }`. YAML formatted string that allows to customize MultiQC behaviour like input file detection", + "help_text": "Type: `string`, example: `qualimap_config: { general_stats_coverage: [20,40,200] }`. YAML formatted string that allows to customize MultiQC behaviour like input file detection.\n" + + } + + +} +}, + + + "output format" : { + "title": "Output format", + "type": "object", + "description": "No description", + "properties": { + + + "flat": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use only flat plots (static images)", + "help_text": "Type: `boolean_true`, default: `false`. Use only flat plots (static images).\n" + , + "default":false + } + + + , + "interactive": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use only interactive plots (in-browser Javascript)", + "help_text": "Type: `boolean_true`, default: `false`. Use only interactive plots (in-browser Javascript).\n" + , + "default":false + } + + + , + "data_dir": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Force the parsed data directory to be created", + "help_text": "Type: `boolean_true`, default: `false`. Force the parsed data directory to be created.\n" + , + "default":false + } + + + , + "no_data_dir": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Prevent the parsed data directory from being created", + "help_text": "Type: `boolean_true`, default: `false`. Prevent the parsed data directory from being created.\n" + , + "default":false + } + + + , + "zip_data_dir": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Compress the data directory", + "help_text": "Type: `boolean_true`, default: `false`. Compress the data directory.\n" + , + "default":false + } + + + , + "data_format": { + "type": + "string", + "description": "Type: `string`, choices: ``tsv`, `csv`, `json`, `yaml``. Output parsed data in a different format than the default \u0027txt\u0027", + "help_text": "Type: `string`, choices: ``tsv`, `csv`, `json`, `yaml``. Output parsed data in a different format than the default \u0027txt\u0027.\n", + "enum": ["tsv", "csv", "json", "yaml"] + + + } + + + , + "pdf": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Creates PDF report with the \u0027simple\u0027 template", + "help_text": "Type: `boolean_true`, default: `false`. Creates PDF report with the \u0027simple\u0027 template. Requires Pandoc to be installed.\n" + , + "default":false + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/ouput" + }, + + { + "$ref": "#/definitions/modules and analyses to run" + }, + + { + "$ref": "#/definitions/sample name handling" + }, + + { + "$ref": "#/definitions/report customisation" + }, + + { + "$ref": "#/definitions/multiqc behaviour" + }, + + { + "$ref": "#/definitions/output format" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml new file mode 100644 index 0000000..b89f7db --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml @@ -0,0 +1,305 @@ +name: "qualimap_rnaseq" +namespace: "qualimap" +version: "v0.3.1" +authors: +- name: "Dorien Roosen" + roles: + - "author" + - "maintainer" + info: + links: + email: "dorien@data-intuitive.com" + github: "dorien-er" + linkedin: "dorien-roosen" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--bam" + description: "Path to the sequence alignment file in BAM format, produced by a\ + \ splicing-aware aligner." + info: null + example: + - "alignment.bam" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "Path to genomic annotations in Ensembl GTF format." + info: null + example: + - "annotations.gtf" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--qc_results" + description: "Text file containing the RNAseq QC results." + info: null + example: + - "rnaseq_qc_results.txt" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts" + description: "Output file for computed counts." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--report" + description: "Report output file. Supported formats are PDF or HTML." + info: null + example: + - "report.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Optional" + arguments: + - type: "integer" + name: "--num_pr_bases" + description: "Number of upstream/downstream nucleotide bases to compute 5'-3'\ + \ bias (default = 100)." + info: null + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--num_tr_bias" + description: "Number of top highly expressed transcripts to compute 5'-3' bias\ + \ (default = 1000)." + info: null + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--algorithm" + description: "Counting algorithm (uniquely-mapped-reads (default) or proportional)." + info: null + required: false + choices: + - "uniquely-mapped-reads" + - "proportional" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sequencing_protocol" + description: "Sequencing library protocol (strand-specific-forward, strand-specific-reverse\ + \ or non-strand-specific (default))." + info: null + required: false + choices: + - "non-strand-specific" + - "strand-specific-reverse" + - "strand-specific-forward" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--paired" + description: "Setting this flag for paired-end experiments will result in counting\ + \ fragments instead of reads." + info: null + direction: "input" + - type: "boolean_true" + name: "--sorted" + description: "Setting this flag indicates that the input file is already sorted\ + \ by name. If flag is not set, additional sorting by name will be performed.\ + \ Only requiredfor paired-end analysis." + info: null + direction: "input" + - type: "string" + name: "--java_memory_size" + description: "maximum Java heap memory size, default = 4G." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Qualimap RNA-seq QC reports quality control metrics and bias estimations\ + \ \nwhich are specific for whole transcriptome sequencing, including reads genomic\ + \ \norigin, junction analysis, transcript coverage and 5’-3’ bias computation.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "RNA-seq" +- "quality control" +- "QC Report" +license: "GPL-2.0" +references: + doi: + - "10.1093/bioinformatics/btv566" +links: + repository: "https://bitbucket.org/kokonech/qualimap/commits/branch/master" + homepage: "http://qualimap.conesalab.org/" + documentation: "http://qualimap.conesalab.org/doc_html/analysis.html#rna-seq-qc" + issue_tracker: "https://bitbucket.org/kokonech/qualimap/issues?status=new&status=open" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/qualimap:2.3--hdfd78af_0" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "echo QualiMap: $(qualimap 2>&1 | grep QualiMap | sed 's/^.*QualiMap//') > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/qualimap/qualimap_rnaseq/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/qualimap/qualimap_rnaseq" + executable: "target/nextflow/qualimap/qualimap_rnaseq/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/main.nf new file mode 100644 index 0000000..7907861 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/main.nf @@ -0,0 +1,3989 @@ +// qualimap_rnaseq v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dorien Roosen (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "qualimap_rnaseq", + "namespace" : "qualimap", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Dorien Roosen", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "dorien@data-intuitive.com", + "github" : "dorien-er", + "linkedin" : "dorien-roosen" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--bam", + "description" : "Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner.", + "example" : [ + "alignment.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf", + "description" : "Path to genomic annotations in Ensembl GTF format.", + "example" : [ + "annotations.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--qc_results", + "description" : "Text file containing the RNAseq QC results.", + "example" : [ + "rnaseq_qc_results.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts", + "description" : "Output file for computed counts.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--report", + "description" : "Report output file. Supported formats are PDF or HTML.", + "example" : [ + "report.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Optional", + "arguments" : [ + { + "type" : "integer", + "name" : "--num_pr_bases", + "description" : "Number of upstream/downstream nucleotide bases to compute 5'-3' bias (default = 100).", + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--num_tr_bias", + "description" : "Number of top highly expressed transcripts to compute 5'-3' bias (default = 1000).", + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--algorithm", + "description" : "Counting algorithm (uniquely-mapped-reads (default) or proportional).", + "required" : false, + "choices" : [ + "uniquely-mapped-reads", + "proportional" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sequencing_protocol", + "description" : "Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).", + "required" : false, + "choices" : [ + "non-strand-specific", + "strand-specific-reverse", + "strand-specific-forward" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--paired", + "description" : "Setting this flag for paired-end experiments will result in counting fragments instead of reads.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--sorted", + "description" : "Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed. Only requiredfor paired-end analysis.", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--java_memory_size", + "description" : "maximum Java heap memory size, default = 4G.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Qualimap RNA-seq QC reports quality control metrics and bias estimations \nwhich are specific for whole transcriptome sequencing, including reads genomic \norigin, junction analysis, transcript coverage and 5’-3’ bias computation.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data/" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "RNA-seq", + "quality control", + "QC Report" + ], + "license" : "GPL-2.0", + "references" : { + "doi" : [ + "10.1093/bioinformatics/btv566" + ] + }, + "links" : { + "repository" : "https://bitbucket.org/kokonech/qualimap/commits/branch/master", + "homepage" : "http://qualimap.conesalab.org/", + "documentation" : "http://qualimap.conesalab.org/doc_html/analysis.html#rna-seq-qc", + "issue_tracker" : "https://bitbucket.org/kokonech/qualimap/issues?status=new&status=open" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/qualimap:2.3--hdfd78af_0", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "echo QualiMap: $(qualimap 2>&1 | grep QualiMap | sed 's/^.*QualiMap//') > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/qualimap/qualimap_rnaseq/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/qualimap/qualimap_rnaseq", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_QC_RESULTS+x} ]; then echo "${VIASH_PAR_QC_RESULTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qc_results='&'#" ; else echo "# par_qc_results="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS+x} ]; then echo "${VIASH_PAR_COUNTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts='&'#" ; else echo "# par_counts="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORT+x} ]; then echo "${VIASH_PAR_REPORT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_report='&'#" ; else echo "# par_report="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_PR_BASES+x} ]; then echo "${VIASH_PAR_NUM_PR_BASES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_pr_bases='&'#" ; else echo "# par_num_pr_bases="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_TR_BIAS+x} ]; then echo "${VIASH_PAR_NUM_TR_BIAS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_tr_bias='&'#" ; else echo "# par_num_tr_bias="; fi ) +$( if [ ! -z ${VIASH_PAR_ALGORITHM+x} ]; then echo "${VIASH_PAR_ALGORITHM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_algorithm='&'#" ; else echo "# par_algorithm="; fi ) +$( if [ ! -z ${VIASH_PAR_SEQUENCING_PROTOCOL+x} ]; then echo "${VIASH_PAR_SEQUENCING_PROTOCOL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sequencing_protocol='&'#" ; else echo "# par_sequencing_protocol="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_SORTED+x} ]; then echo "${VIASH_PAR_SORTED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sorted='&'#" ; else echo "# par_sorted="; fi ) +$( if [ ! -z ${VIASH_PAR_JAVA_MEMORY_SIZE+x} ]; then echo "${VIASH_PAR_JAVA_MEMORY_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_java_memory_size='&'#" ; else echo "# par_java_memory_size="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +tmp_dir=\\$(mktemp -d -p "\\$meta_temp_dir" qualimap_XXXXXXXXX) + +# Handle output parameters +if [ -n "\\$par_report" ]; then + outfile=\\$(basename "\\$par_report") + report_extension="\\${outfile##*.}" +fi + +if [ -n "\\$par_counts" ]; then + counts=\\$(basename "\\$par_counts") +fi + +# disable flags +[[ "\\$par_paired" == "false" ]] && unset par_paired +[[ "\\$par_sorted" == "false" ]] && unset par_sorted + +# Run qualimap +qualimap rnaseq \\\\ + \\${meta_memory_mb:+--java-mem-size=\\${meta_memory_mb}M} \\\\ + \\${par_algorithm:+--algorithm \\$par_algorithm} \\\\ + \\${par_sequencing_protocol:+--sequencing-protocol \\$par_sequencing_protocol} \\\\ + -bam \\$par_bam \\\\ + -gtf \\$par_gtf \\\\ + -outdir "\\$tmp_dir" \\\\ + \\${par_num_pr_bases:+--num-pr-bases \\$par_num_pr_bases} \\\\ + \\${par_num_tr_bias:+--num-tr-bias \\$par_num_tr_bias} \\\\ + \\${par_report:+-outformat \\$report_extension} \\\\ + \\${par_paired:+--paired} \\\\ + \\${par_sorted:+--sorted} \\\\ + \\${par_report:+-outfile "\\$outfile"} \\\\ + \\${par_counts:+-oc "\\$counts"} + +# Move output files +mv "\\$tmp_dir/rnaseq_qc_results.txt" "\\$par_qc_results" + +if [ -n "\\$par_report" ] && [ \\$report_extension = "html" ]; then + mv "\\$tmp_dir/qualimapReport.html" "\\$par_report" +fi + +if [ -n "\\$par_report" ] && [ \\$report_extension = "pdf" ]; then + mv "\\$tmp_dir/\\$outfile" "\\$par_report" +fi + +if [ -n "\\$par_counts" ]; then + mv "\\$tmp_dir/\\$counts" "\\$par_counts" +fi +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/qualimap/qualimap_rnaseq", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/nextflow.config new file mode 100644 index 0000000..32c93b7 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'qualimap/qualimap_rnaseq' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Qualimap RNA-seq QC reports quality control metrics and bias estimations \nwhich are specific for whole transcriptome sequencing, including reads genomic \norigin, junction analysis, transcript coverage and 5’-3’ bias computation.\n' + author = 'Dorien Roosen' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/nextflow_schema.json new file mode 100644 index 0000000..47e34c9 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/nextflow_schema.json @@ -0,0 +1,217 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "qualimap_rnaseq", +"description": "Qualimap RNA-seq QC reports quality control metrics and bias estimations \nwhich are specific for whole transcriptome sequencing, including reads genomic \norigin, junction analysis, transcript coverage and 5\u2019-3\u2019 bias computation.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "bam": { + "type": + "string", + "description": "Type: `file`, required, example: `alignment.bam`. Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner", + "help_text": "Type: `file`, required, example: `alignment.bam`. Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner." + + } + + + , + "gtf": { + "type": + "string", + "description": "Type: `file`, required, example: `annotations.gtf`. Path to genomic annotations in Ensembl GTF format", + "help_text": "Type: `file`, required, example: `annotations.gtf`. Path to genomic annotations in Ensembl GTF format." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "qc_results": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.qc_results.txt`, example: `rnaseq_qc_results.txt`. Text file containing the RNAseq QC results", + "help_text": "Type: `file`, required, default: `$id.$key.qc_results.txt`, example: `rnaseq_qc_results.txt`. Text file containing the RNAseq QC results." + , + "default":"$id.$key.qc_results.txt" + } + + + , + "counts": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.counts`. Output file for computed counts", + "help_text": "Type: `file`, default: `$id.$key.counts`. Output file for computed counts." + , + "default":"$id.$key.counts" + } + + + , + "report": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.report.html`, example: `report.html`. Report output file", + "help_text": "Type: `file`, default: `$id.$key.report.html`, example: `report.html`. Report output file. Supported formats are PDF or HTML." + , + "default":"$id.$key.report.html" + } + + +} +}, + + + "optional" : { + "title": "Optional", + "type": "object", + "description": "No description", + "properties": { + + + "num_pr_bases": { + "type": + "integer", + "description": "Type: `integer`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)", + "help_text": "Type: `integer`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)." + + } + + + , + "num_tr_bias": { + "type": + "integer", + "description": "Type: `integer`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)", + "help_text": "Type: `integer`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)." + + } + + + , + "algorithm": { + "type": + "string", + "description": "Type: `string`, choices: ``uniquely-mapped-reads`, `proportional``. Counting algorithm (uniquely-mapped-reads (default) or proportional)", + "help_text": "Type: `string`, choices: ``uniquely-mapped-reads`, `proportional``. Counting algorithm (uniquely-mapped-reads (default) or proportional).", + "enum": ["uniquely-mapped-reads", "proportional"] + + + } + + + , + "sequencing_protocol": { + "type": + "string", + "description": "Type: `string`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default))", + "help_text": "Type: `string`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).", + "enum": ["non-strand-specific", "strand-specific-reverse", "strand-specific-forward"] + + + } + + + , + "paired": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Setting this flag for paired-end experiments will result in counting fragments instead of reads", + "help_text": "Type: `boolean_true`, default: `false`. Setting this flag for paired-end experiments will result in counting fragments instead of reads." + , + "default":false + } + + + , + "sorted": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Setting this flag indicates that the input file is already sorted by name", + "help_text": "Type: `boolean_true`, default: `false`. Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed. Only requiredfor paired-end analysis." + , + "default":false + } + + + , + "java_memory_size": { + "type": + "string", + "description": "Type: `string`. maximum Java heap memory size, default = 4G", + "help_text": "Type: `string`. maximum Java heap memory size, default = 4G." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/optional" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml new file mode 100644 index 0000000..d4926d5 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml @@ -0,0 +1,893 @@ +name: "rsem_calculate_expression" +namespace: "rsem" +version: "v0.3.1" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "Sample ID." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, reverse" + info: null + required: false + choices: + - "forward" + - "reverse" + - "unstranded" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--paired" + description: "Paired-end reads or not?" + info: null + direction: "input" + - type: "file" + name: "--input" + description: "Input reads for quantification." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--index" + description: "RSEM index." + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_args" + description: "Extra rsem-calculate-expression arguments in addition to the examples." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--counts_gene" + description: "Expression counts on gene level" + info: null + example: + - "$id.genes.results" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcripts" + description: "Expression counts on transcript level" + info: null + example: + - "$id.isoforms.results" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stat" + description: "RSEM statistics" + info: null + example: + - "$id.stat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--logs" + description: "RSEM logs" + info: null + example: + - "$id.log" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_star" + description: "BAM file generated by STAR (optional)" + info: null + example: + - "$id.STAR.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_genome" + description: "Genome BAM file (optional)" + info: null + example: + - "$id.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_transcript" + description: "Transcript BAM file (optional)" + info: null + example: + - "$id.transcript.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--sort_bam_by_read_name" + description: "Sort BAM file aligned under transcript coordidate by read name.\ + \ Setting this option on will produce \ndeterministic maximum likelihood estimations\ + \ from independent runs. Note that sorting will take long \ntime and lots of\ + \ memory.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_bam_output" + description: "Do not output any BAM file." + info: null + direction: "input" + - type: "boolean_true" + name: "--sampling_for_bam" + description: "When RSEM generates a BAM file, instead of outputting all alignments\ + \ a read has with their posterior \nprobabilities, one alignment is sampled\ + \ according to the posterior probabilities. The sampling procedure \nincludes\ + \ the alignment to the \"noise\" transcript, which does not appear in the BAM\ + \ file. Only the \nsampled alignment has a weight of 1. All other alignments\ + \ have weight 0. If the \"noise\" transcript is \nsampled, all alignments appeared\ + \ in the BAM file should have weight 0.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--output_genome_bam" + description: "Generate a BAM file, 'sample_name.genome.bam', with alignments mapped\ + \ to genomic coordinates and \nannotated with their posterior probabilities.\ + \ In addition, RSEM will call samtools (included in RSEM \npackage) to sort\ + \ and index the bam file. 'sample_name.genome.sorted.bam' and 'sample_name.genome.sorted.bam.bai'\ + \ \nwill be generated.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--sort_bam_by_coordinate" + description: "Sort RSEM generated transcript and genome BAM files by coordinates\ + \ and build associated indices.\n" + info: null + direction: "input" +- name: "Basic Options" + arguments: + - type: "boolean_true" + name: "--no_qualities" + description: "Input reads do not contain quality scores." + info: null + direction: "input" + - type: "boolean_true" + name: "--alignments" + description: "Input file contains alignments in SAM/BAM/CRAM format. The exact\ + \ file format will be determined \nautomatically.\n" + info: null + direction: "input" + - type: "file" + name: "--fai" + description: "If the header section of input alignment file does not contain reference\ + \ sequence information, \nthis option should be turned on. is a FAI format\ + \ file containing each reference sequence's \nname and length. Please refer\ + \ to the SAM official website for the details of FAI format.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--bowtie2" + description: "Use Bowtie 2 instead of Bowtie to align reads. Since currently RSEM\ + \ does not handle indel, local \nand discordant alignments, the Bowtie2 parameters\ + \ are set in a way to avoid those alignments. In \nparticular, we use options\ + \ '--sensitive --dpad 0 --gbar 99999999 --mp 1,1 --np 1 --score_min L,0,-0.1'\ + \ \nby default. The last parameter of '--score_min', '-0.1', is the negative\ + \ of maximum mismatch rate. \nThis rate can be set by option '--bowtie2_mismatch_rate'.\ + \ If reads are paired-end, we additionally \nuse options '--no_mixed' and '--no_discordant'.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--star" + description: "Use STAR to align reads. Alignment parameters are from ENCODE3's\ + \ STAR-RSEM pipeline. To save \ncomputational time and memory resources, STAR's\ + \ Output BAM file is unsorted. It is stored in RSEM's \ntemporary directory\ + \ with name as 'sample_name.bam'. Each STAR job will have its own private copy\ + \ of \nthe genome in memory.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--hisat2_hca" + description: "Use HISAT2 to align reads to the transcriptome according to Human\ + \ Cell Atlast.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--append_names" + description: "If gene_name/transcript_name is available, append it to the end\ + \ of gene_id/transcript_id (separated \nby '_') in files 'sample_name.isoforms.results'\ + \ and 'sample_name.genes.results'.\n" + info: null + direction: "input" + - type: "integer" + name: "--seed" + description: "Set the seed for the random number generators used in calculating\ + \ posterior mean estimates and \ncredibility intervals. The seed must be a non-negative\ + \ 32 bit integer.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--single_cell_prior" + description: "By default, RSEM uses Dirichlet(1) as the prior to calculate posterior\ + \ mean estimates and credibility \nintervals. However, much less genes are expressed\ + \ in single cell RNA-Seq data. Thus, if you want to \ncompute posterior mean\ + \ estimates and/or credibility intervals and you have single-cell RNA-Seq data,\ + \ \nyou are recommended to turn on this option. Then RSEM will use Dirichlet(0.1)\ + \ as the prior which \nencourage the sparsity of the expression levels.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--calc_pme" + description: "Run RSEM's collapsed Gibbs sampler to calculate posterior mean estimates." + info: null + direction: "input" + - type: "boolean_true" + name: "--calc_ci" + description: "Calculate 95% credibility intervals and posterior mean estimates.\ + \ The credibility level can be \nchanged by setting '--ci_credibility_level'.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--quiet" + alternatives: + - "-q" + description: "Suppress the output of logging information." + info: null + direction: "input" +- name: "Aligner Options" + arguments: + - type: "integer" + name: "--seed_length" + description: "Seed length used by the read aligner. Providing the correct value\ + \ is important for RSEM. If RSEM \nruns Bowtie, it uses this value for Bowtie's\ + \ seed length parameter. Any read with its or at least \none of its mates' (for\ + \ paired-end reads) length less than this value will be ignored. If the \nreferences\ + \ are not added poly(A) tails, the minimum allowed value is 5, otherwise, the\ + \ minimum \nallowed value is 25. Note that this script will only check if the\ + \ value >= 5 and give a warning \nmessage if the value < 25 but >= 5. (Default:\ + \ 25)\n" + info: null + example: + - 25 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--phred64_quals" + description: "Input quality scores are encoded as Phred+64 (default for GA Pipeline\ + \ ver. >= 1.3). This option is \nused by Bowtie, Bowtie 2 and HISAT2. Otherwise,\ + \ quality score will be encoded as Phred+33. (Default: false)\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--solexa_quals" + description: "Input quality scores are solexa encoded (from GA Pipeline ver. <\ + \ 1.3). This option is used by \nBowtie, Bowtie 2 and HISAT2. Otherwise, quality\ + \ score will be encoded as Phred+33. (Default: false)\n" + info: null + direction: "input" + - type: "integer" + name: "--bowtie_n" + description: "(Bowtie parameter) max # of mismatches in the seed. (Range: 0-3,\ + \ Default: 2)\n" + info: null + example: + - 2 + required: false + choices: + - 0 + - 1 + - 2 + - 3 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bowtie_e" + description: "(Bowtie parameter) max sum of mismatch quality scores across the\ + \ alignment. (Default: 99999999)\n" + info: null + example: + - 99999999 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bowtie_m" + description: "(Bowtie parameter) suppress all alignments for a read if > \ + \ valid alignments exist. (Default: 200)\n" + info: null + example: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bowtie_chunkmbs" + description: "(Bowtie parameter) memory allocated for best first alignment calculation\ + \ (Default: 0 - use Bowtie's default)\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--bowtie2_mismatch_rate" + description: "(Bowtie 2 parameter) The maximum mismatch rate allowed. (Default:\ + \ 0.1)\n" + info: null + example: + - 0.1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bowtie2_k" + description: "(Bowtie 2 parameter) Find up to alignments per read. (Default:\ + \ 200)\n" + info: null + example: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--bowtie2_sensitivity_level" + description: "(Bowtie 2 parameter) Set Bowtie 2's preset options in --end-to-end\ + \ mode. This option controls how \nhard Bowtie 2 tries to find alignments. \ + \ must be one of \"very_fast\", \"fast\", \"sensitive\" \nand \"very_sensitive\"\ + . The four candidates correspond to Bowtie 2's \"--very-fast\", \"--fast\",\ + \ \n\"--sensitive\" and \"--very-sensitive\" options. (Default: \"sensitive\"\ + \ - use Bowtie 2's default)\n" + info: null + example: + - "sensitive" + required: false + choices: + - "very_fast" + - "fast" + - "sensitive" + - "very_sensitive" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--star_gzipped_read_file" + description: "Input read file(s) is compressed by gzip. (Default: false)\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--star_bzipped_read_file" + description: "Input read file(s) is compressed by bzip2. (Default: false)\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--star_output_genome_bam" + description: "Save the BAM file from STAR alignment under genomic coordinate to\ + \ 'sample_name.STAR.genome.bam'. \nThis file is NOT sorted by genomic coordinate.\ + \ In this file, according to STAR's manual, 'paired \nends of an alignment are\ + \ always adjacent, and multiple alignments of a read are adjacent as well'.\ + \ \n(Default: false)\n" + info: null + direction: "input" +- name: "Advanced Options" + arguments: + - type: "string" + name: "--tag" + description: "The name of the optional field used in the SAM input for identifying\ + \ a read with too many valid \nalignments. The field should have the format\ + \ :i:, where a bigger than 0 \nindicates a read with\ + \ too many alignments. (Default: \"\")\n" + info: null + example: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--fragment_length_min" + description: "Minimum read/insert length allowed. This is also the value for the\ + \ Bowtie/Bowtie2 -I option. \n(Default: 1)\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--fragment_length_max" + description: "Maximum read/insert length allowed. This is also the value for the\ + \ Bowtie/Bowtie 2 -X option. \n(Default: 1000)\n" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--fragment_length_mean" + description: "(single-end data only) The mean of the fragment length distribution,\ + \ which is assumed to be a \nGaussian. (Default: -1, which disables use of the\ + \ fragment length distribution)\n" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--gragment_length_sd" + description: "(single-end data only) The standard deviation of the fragment length\ + \ distribution, which is \nassumed to be a Gaussian. (Default: 0, which assumes\ + \ that all fragments are of the same length, \ngiven by the rounded value of\ + \ --fragment_length_mean).\n" + info: null + example: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--estimate_rspd" + description: "Set this option if you want to estimate the read start position\ + \ distribution (RSPD) from data.\nOtherwise, RSEM will use a uniform RSPD.\n" + info: null + direction: "input" + - type: "integer" + name: "--num_rspd_bins" + description: "Number of bins in the RSPD. Only relevant when '--estimate_rspd'\ + \ is specified. Use of the default \nsetting is recommended. (Default: 20)\n" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--gibbs_burnin" + description: "The number of burn-in rounds for RSEM's Gibbs sampler. Each round\ + \ passes over the entire data set \nonce. If RSEM can use multiple threads,\ + \ multiple Gibbs samplers will start at the same time and all \nsamplers share\ + \ the same burn-in number. (Default: 200)\n" + info: null + example: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--gibbs_number_of_samples" + description: "The total number of count vectors RSEM will collect from its Gibbs\ + \ samplers. (Default: 1000)\n" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--gibbs_sampling_gap" + description: "The number of rounds between two succinct count vectors RSEM collects.\ + \ If the count vector after \nround N is collected, the count vector after round\ + \ N + will also be collected. (Default: 1)\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--ci_credibility_level" + description: "The credibility level for credibility intervals. (Default: 0.95)\n" + info: null + example: + - 0.95 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--ci_number_of_samples_per_count_vector" + description: "The number of read generating probability vectors sampled per sampled\ + \ count vector. The crebility \nintervals are calculated by first sampling P(C\ + \ | D) and then sampling P(Theta | C) for each sampled \ncount vector. This\ + \ option controls how many Theta vectors are sampled per sampled count vector.\ + \ \n(Default: 50)\n" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--keep_intermediate_files" + description: "Keep temporary files generated by RSEM. RSEM creates a temporary\ + \ directory, 'sample_name.temp', \ninto which it puts all intermediate output\ + \ files. If this directory already exists, RSEM overwrites \nall files generated\ + \ by previous RSEM runs inside of it. By default, after RSEM finishes, the \n\ + temporary directory is deleted. Set this option to prevent the deletion of this\ + \ directory and the \nintermediate files inside of it.\n" + info: null + direction: "input" + - type: "string" + name: "--temporary_folder" + description: "Set where to put the temporary files generated by RSEM. If the folder\ + \ specified does not exist, \nRSEM will try to create it. (Default: sample_name.temp)\n" + info: null + example: + - "sample_name.temp" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--time" + description: "Output time consumed by each step of RSEM to 'sample_name.time'.\n" + info: null + direction: "input" +- name: "Prior-Enhanced RSEM Options" + arguments: + - type: "boolean_true" + name: "--run_pRSEM" + description: "Running prior-enhanced RSEM (pRSEM). Prior parameters, i.e. isoform's\ + \ initial pseudo-count for \nRSEM's Gibbs sampling, will be learned from input\ + \ RNA-seq data and an external data set. When pRSEM \nneeds and only needs ChIP-seq\ + \ peak information to partition isoforms (e.g. in pRSEM's default \npartition\ + \ model), either ChIP-seq peak file (with the '--chipseq_peak_file' option)\ + \ or ChIP-seq \nFASTQ files for target and input and the path for Bowtie executables\ + \ are required (with the \n'--chipseq_target_read_files ', '--chipseq_control_read_files\ + \ ', and '--bowtie_path \n options), otherwise, ChIP-seq FASTQ\ + \ files for target and control and the path to Bowtie \nexecutables are required.\n" + info: null + direction: "input" + - type: "file" + name: "--chipseq_peak_file" + description: "Full path to a ChIP-seq peak file in ENCODE's narrowPeak, i.e. BED6+4,\ + \ format. This file is used \nwhen running prior-enhanced RSEM in the default\ + \ two-partition model. It partitions isoforms by \nwhether they have ChIP-seq\ + \ overlapping with their transcription start site region or not. Each \npartition\ + \ will have its own prior parameter learned from a training set. This file can\ + \ be either \ngzipped or ungzipped.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chipseq_target_read_files" + description: "Comma-separated full path of FASTQ read file(s) for ChIP-seq target.\ + \ This option is used when running \nprior-enhanced RSEM. It provides information\ + \ to calculate ChIP-seq peaks and signals. The file(s) \ncan be either ungzipped\ + \ or gzipped with a suffix '.gz' or '.gzip'. The options '--bowtie_path '\ + \ \nand '--chipseq_control_read_files ' must be defined when this option\ + \ is specified.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chipseq_control_read_files" + description: "Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol.\ + \ This option is used when running \nprior-enhanced RSEM. It provides information\ + \ to call ChIP-seq peaks. The file(s) can be either \nungzipped or gzipped with\ + \ a suffix '.gz' or '.gzip'. The options '--bowtie_path ' and \n'--chipseq_target_read_files\ + \ ' must be defined when this option is specified.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chipseq_read_files_multi_targets" + description: "Comma-separated full path of FASTQ read files for multiple ChIP-seq\ + \ targets. This option is used when \nrunning prior-enhanced RSEM, where prior\ + \ is learned from multiple complementary data sets. It provides \ninformation\ + \ to calculate ChIP-seq signals. All files can be either ungzipped or gzipped\ + \ with a suffix \n'.gz' or '.gzip'. When this option is specified, the option\ + \ '--bowtie_path ' must be defined and \nthe option '--partition_model\ + \ ' will be set to 'cmb_lgt' automatically.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chipseq_bed_files_multi_targets" + description: "Comma-separated full path of BED files for multiple ChIP-seq targets.\ + \ This option is used when running \nprior-enhanced RSEM, where prior is learned\ + \ from multiple complementary data sets. It provides information \nof ChIP-seq\ + \ signals and must have at least the first six BED columns. All files can be\ + \ either ungzipped \nor gzipped with a suffix '.gz' or '.gzip'. When this option\ + \ is specified, the option '--partition_model \n' will be set to 'cmb_lgt'\ + \ automatically.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--cap_stacked_chipseq_reads" + description: "Keep a maximum number of ChIP-seq reads that aligned to the same\ + \ genomic interval. This option is used \nwhen running prior-enhanced RSEM,\ + \ where prior is learned from multiple complementary data sets. This \noption\ + \ is only in use when either '--chipseq_read_files_multi_targets ' or\ + \ \n'--chipseq_bed_files_multi_targets ' is specified.\n" + info: null + direction: "input" + - type: "integer" + name: "--n_max_stacked_chipseq_reads" + description: "The maximum number of stacked ChIP-seq reads to keep. This option\ + \ is used when running prior-enhanced \nRSEM, where prior is learned from multiple\ + \ complementary data sets. This option is only in use when the \noption '--cap_stacked_chipseq_reads'\ + \ is set.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--partition_model" + description: "A keyword to specify the partition model used by prior-enhanced\ + \ RSEM. It must be one of the following \nkeywords:\n* pk\n* pk_lgtnopk\n* lm3,\ + \ lm4, lm5, or lm6\n* nopk_lm2pk, nopk_lm3pk, nopk_lm4pk, or nopk_lm5pk\n* pk_lm2nopk,\ + \ pk_lm3nopk, pk_lm4nopk, or pk_lm5nopk\n* cmb_lgt\nParameters for all the above\ + \ models are learned from a training set. For detailed explanations, please\ + \ \nsee prior-enhanced RSEM's paper. (Default: 'pk')\n" + info: null + example: + - "pk" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Calculate expression with RSEM. \n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "Transcriptome" +- "Index" +- "Alignment" +- "RSEM" +license: "GPL-3.0" +references: + doi: + - "https://doi.org/10.1186/1471-2105-12-323" +links: + repository: "https://github.com/deweylab/RSEM" + homepage: "https://deweylab.github.io/RSEM/" + documentation: "https://deweylab.github.io/RSEM/rsem-calculate-expression.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "build-essential" + - "gcc" + - "g++" + - "make" + - "wget" + - "zlib1g-dev" + - "unzip" + interactive: false + - type: "docker" + run: + - "apt-get update && \\\napt-get clean && \\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/2.7.11a.zip\ + \ && \\\nunzip 2.7.11a.zip && \\\ncp STAR-2.7.11a/bin/Linux_x86_64_static/STAR\ + \ /usr/local/bin && \\\ncd && \\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v1.3.3.zip\ + \ && \\\nunzip v1.3.3.zip && \\\ncd RSEM-1.3.3 && \\\nmake && \\\nmake install\n" + env: + - "STAR_VERSION=2.7.11b" + - "RSEM_VERSION=1.3.3" + - type: "docker" + run: + - "echo \"RSEM: `rsem-calculate-expression --version | sed -e 's/Current version:\ + \ RSEM v//g'`\" > /var/software_versions.txt && \\\necho \"STAR: `STAR --version`\"\ + \ >> /var/software_versions.txt && \\\necho \"bowtie2: `bowtie2 --version |\ + \ grep -oP '\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho\ + \ \"bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \\K\\d+\\.\\\ + d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho \"HISAT2: `hisat2 --version\ + \ | grep -oP 'hisat2-align-s version \\K\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rsem/rsem_calculate_expression/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/rsem/rsem_calculate_expression" + executable: "target/nextflow/rsem/rsem_calculate_expression/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/main.nf new file mode 100644 index 0000000..2ffa405 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/main.nf @@ -0,0 +1,4622 @@ +// rsem_calculate_expression v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "rsem_calculate_expression", + "namespace" : "rsem", + "version" : "v0.3.1", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "Sample ID.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--strandedness", + "description" : "Sample strand-specificity. Must be one of unstranded, forward, reverse", + "required" : false, + "choices" : [ + "forward", + "reverse", + "unstranded" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--paired", + "description" : "Paired-end reads or not?", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--input", + "description" : "Input reads for quantification.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--index", + "description" : "RSEM index.", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extra_args", + "description" : "Extra rsem-calculate-expression arguments in addition to the examples.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--counts_gene", + "description" : "Expression counts on gene level", + "example" : [ + "$id.genes.results" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_transcripts", + "description" : "Expression counts on transcript level", + "example" : [ + "$id.isoforms.results" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--stat", + "description" : "RSEM statistics", + "example" : [ + "$id.stat" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--logs", + "description" : "RSEM logs", + "example" : [ + "$id.log" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam_star", + "description" : "BAM file generated by STAR (optional)", + "example" : [ + "$id.STAR.genome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam_genome", + "description" : "Genome BAM file (optional)", + "example" : [ + "$id.genome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam_transcript", + "description" : "Transcript BAM file (optional)", + "example" : [ + "$id.transcript.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--sort_bam_by_read_name", + "description" : "Sort BAM file aligned under transcript coordidate by read name. Setting this option on will produce \ndeterministic maximum likelihood estimations from independent runs. Note that sorting will take long \ntime and lots of memory.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_bam_output", + "description" : "Do not output any BAM file.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--sampling_for_bam", + "description" : "When RSEM generates a BAM file, instead of outputting all alignments a read has with their posterior \nprobabilities, one alignment is sampled according to the posterior probabilities. The sampling procedure \nincludes the alignment to the \\"noise\\" transcript, which does not appear in the BAM file. Only the \nsampled alignment has a weight of 1. All other alignments have weight 0. If the \\"noise\\" transcript is \nsampled, all alignments appeared in the BAM file should have weight 0.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--output_genome_bam", + "description" : "Generate a BAM file, 'sample_name.genome.bam', with alignments mapped to genomic coordinates and \nannotated with their posterior probabilities. In addition, RSEM will call samtools (included in RSEM \npackage) to sort and index the bam file. 'sample_name.genome.sorted.bam' and 'sample_name.genome.sorted.bam.bai' \nwill be generated.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--sort_bam_by_coordinate", + "description" : "Sort RSEM generated transcript and genome BAM files by coordinates and build associated indices.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Basic Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--no_qualities", + "description" : "Input reads do not contain quality scores.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--alignments", + "description" : "Input file contains alignments in SAM/BAM/CRAM format. The exact file format will be determined \nautomatically.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--fai", + "description" : "If the header section of input alignment file does not contain reference sequence information, \nthis option should be turned on. is a FAI format file containing each reference sequence's \nname and length. Please refer to the SAM official website for the details of FAI format.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--bowtie2", + "description" : "Use Bowtie 2 instead of Bowtie to align reads. Since currently RSEM does not handle indel, local \nand discordant alignments, the Bowtie2 parameters are set in a way to avoid those alignments. In \nparticular, we use options '--sensitive --dpad 0 --gbar 99999999 --mp 1,1 --np 1 --score_min L,0,-0.1' \nby default. The last parameter of '--score_min', '-0.1', is the negative of maximum mismatch rate. \nThis rate can be set by option '--bowtie2_mismatch_rate'. If reads are paired-end, we additionally \nuse options '--no_mixed' and '--no_discordant'.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--star", + "description" : "Use STAR to align reads. Alignment parameters are from ENCODE3's STAR-RSEM pipeline. To save \ncomputational time and memory resources, STAR's Output BAM file is unsorted. It is stored in RSEM's \ntemporary directory with name as 'sample_name.bam'. Each STAR job will have its own private copy of \nthe genome in memory.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--hisat2_hca", + "description" : "Use HISAT2 to align reads to the transcriptome according to Human Cell Atlast.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--append_names", + "description" : "If gene_name/transcript_name is available, append it to the end of gene_id/transcript_id (separated \nby '_') in files 'sample_name.isoforms.results' and 'sample_name.genes.results'.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--seed", + "description" : "Set the seed for the random number generators used in calculating posterior mean estimates and \ncredibility intervals. The seed must be a non-negative 32 bit integer.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--single_cell_prior", + "description" : "By default, RSEM uses Dirichlet(1) as the prior to calculate posterior mean estimates and credibility \nintervals. However, much less genes are expressed in single cell RNA-Seq data. Thus, if you want to \ncompute posterior mean estimates and/or credibility intervals and you have single-cell RNA-Seq data, \nyou are recommended to turn on this option. Then RSEM will use Dirichlet(0.1) as the prior which \nencourage the sparsity of the expression levels.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--calc_pme", + "description" : "Run RSEM's collapsed Gibbs sampler to calculate posterior mean estimates.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--calc_ci", + "description" : "Calculate 95% credibility intervals and posterior mean estimates. The credibility level can be \nchanged by setting '--ci_credibility_level'.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--quiet", + "alternatives" : [ + "-q" + ], + "description" : "Suppress the output of logging information.", + "direction" : "input" + } + ] + }, + { + "name" : "Aligner Options", + "arguments" : [ + { + "type" : "integer", + "name" : "--seed_length", + "description" : "Seed length used by the read aligner. Providing the correct value is important for RSEM. If RSEM \nruns Bowtie, it uses this value for Bowtie's seed length parameter. Any read with its or at least \none of its mates' (for paired-end reads) length less than this value will be ignored. If the \nreferences are not added poly(A) tails, the minimum allowed value is 5, otherwise, the minimum \nallowed value is 25. Note that this script will only check if the value >= 5 and give a warning \nmessage if the value < 25 but >= 5. (Default: 25)\n", + "example" : [ + 25 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--phred64_quals", + "description" : "Input quality scores are encoded as Phred+64 (default for GA Pipeline ver. >= 1.3). This option is \nused by Bowtie, Bowtie 2 and HISAT2. Otherwise, quality score will be encoded as Phred+33. (Default: false)\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--solexa_quals", + "description" : "Input quality scores are solexa encoded (from GA Pipeline ver. < 1.3). This option is used by \nBowtie, Bowtie 2 and HISAT2. Otherwise, quality score will be encoded as Phred+33. (Default: false)\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--bowtie_n", + "description" : "(Bowtie parameter) max # of mismatches in the seed. (Range: 0-3, Default: 2)\n", + "example" : [ + 2 + ], + "required" : false, + "choices" : [ + 0, + 1, + 2, + 3 + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bowtie_e", + "description" : "(Bowtie parameter) max sum of mismatch quality scores across the alignment. (Default: 99999999)\n", + "example" : [ + 99999999 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bowtie_m", + "description" : "(Bowtie parameter) suppress all alignments for a read if > valid alignments exist. (Default: 200)\n", + "example" : [ + 200 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bowtie_chunkmbs", + "description" : "(Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use Bowtie's default)\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--bowtie2_mismatch_rate", + "description" : "(Bowtie 2 parameter) The maximum mismatch rate allowed. (Default: 0.1)\n", + "example" : [ + 0.1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bowtie2_k", + "description" : "(Bowtie 2 parameter) Find up to alignments per read. (Default: 200)\n", + "example" : [ + 200 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--bowtie2_sensitivity_level", + "description" : "(Bowtie 2 parameter) Set Bowtie 2's preset options in --end-to-end mode. This option controls how \nhard Bowtie 2 tries to find alignments. must be one of \\"very_fast\\", \\"fast\\", \\"sensitive\\" \nand \\"very_sensitive\\". The four candidates correspond to Bowtie 2's \\"--very-fast\\", \\"--fast\\", \n\\"--sensitive\\" and \\"--very-sensitive\\" options. (Default: \\"sensitive\\" - use Bowtie 2's default)\n", + "example" : [ + "sensitive" + ], + "required" : false, + "choices" : [ + "very_fast", + "fast", + "sensitive", + "very_sensitive" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--star_gzipped_read_file", + "description" : "Input read file(s) is compressed by gzip. (Default: false)\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--star_bzipped_read_file", + "description" : "Input read file(s) is compressed by bzip2. (Default: false)\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--star_output_genome_bam", + "description" : "Save the BAM file from STAR alignment under genomic coordinate to 'sample_name.STAR.genome.bam'. \nThis file is NOT sorted by genomic coordinate. In this file, according to STAR's manual, 'paired \nends of an alignment are always adjacent, and multiple alignments of a read are adjacent as well'. \n(Default: false)\n", + "direction" : "input" + } + ] + }, + { + "name" : "Advanced Options", + "arguments" : [ + { + "type" : "string", + "name" : "--tag", + "description" : "The name of the optional field used in the SAM input for identifying a read with too many valid \nalignments. The field should have the format :i:, where a bigger than 0 \nindicates a read with too many alignments. (Default: \\"\\")\n", + "example" : [ + "" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--fragment_length_min", + "description" : "Minimum read/insert length allowed. This is also the value for the Bowtie/Bowtie2 -I option. \n(Default: 1)\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--fragment_length_max", + "description" : "Maximum read/insert length allowed. This is also the value for the Bowtie/Bowtie 2 -X option. \n(Default: 1000)\n", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--fragment_length_mean", + "description" : "(single-end data only) The mean of the fragment length distribution, which is assumed to be a \nGaussian. (Default: -1, which disables use of the fragment length distribution)\n", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--gragment_length_sd", + "description" : "(single-end data only) The standard deviation of the fragment length distribution, which is \nassumed to be a Gaussian. (Default: 0, which assumes that all fragments are of the same length, \ngiven by the rounded value of --fragment_length_mean).\n", + "example" : [ + 0.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--estimate_rspd", + "description" : "Set this option if you want to estimate the read start position distribution (RSPD) from data.\nOtherwise, RSEM will use a uniform RSPD.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--num_rspd_bins", + "description" : "Number of bins in the RSPD. Only relevant when '--estimate_rspd' is specified. Use of the default \nsetting is recommended. (Default: 20)\n", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--gibbs_burnin", + "description" : "The number of burn-in rounds for RSEM's Gibbs sampler. Each round passes over the entire data set \nonce. If RSEM can use multiple threads, multiple Gibbs samplers will start at the same time and all \nsamplers share the same burn-in number. (Default: 200)\n", + "example" : [ + 200 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--gibbs_number_of_samples", + "description" : "The total number of count vectors RSEM will collect from its Gibbs samplers. (Default: 1000)\n", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--gibbs_sampling_gap", + "description" : "The number of rounds between two succinct count vectors RSEM collects. If the count vector after \nround N is collected, the count vector after round N + will also be collected. (Default: 1)\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--ci_credibility_level", + "description" : "The credibility level for credibility intervals. (Default: 0.95)\n", + "example" : [ + 0.95 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--ci_number_of_samples_per_count_vector", + "description" : "The number of read generating probability vectors sampled per sampled count vector. The crebility \nintervals are calculated by first sampling P(C | D) and then sampling P(Theta | C) for each sampled \ncount vector. This option controls how many Theta vectors are sampled per sampled count vector. \n(Default: 50)\n", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--keep_intermediate_files", + "description" : "Keep temporary files generated by RSEM. RSEM creates a temporary directory, 'sample_name.temp', \ninto which it puts all intermediate output files. If this directory already exists, RSEM overwrites \nall files generated by previous RSEM runs inside of it. By default, after RSEM finishes, the \ntemporary directory is deleted. Set this option to prevent the deletion of this directory and the \nintermediate files inside of it.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--temporary_folder", + "description" : "Set where to put the temporary files generated by RSEM. If the folder specified does not exist, \nRSEM will try to create it. (Default: sample_name.temp)\n", + "example" : [ + "sample_name.temp" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--time", + "description" : "Output time consumed by each step of RSEM to 'sample_name.time'.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Prior-Enhanced RSEM Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--run_pRSEM", + "description" : "Running prior-enhanced RSEM (pRSEM). Prior parameters, i.e. isoform's initial pseudo-count for \nRSEM's Gibbs sampling, will be learned from input RNA-seq data and an external data set. When pRSEM \nneeds and only needs ChIP-seq peak information to partition isoforms (e.g. in pRSEM's default \npartition model), either ChIP-seq peak file (with the '--chipseq_peak_file' option) or ChIP-seq \nFASTQ files for target and input and the path for Bowtie executables are required (with the \n'--chipseq_target_read_files ', '--chipseq_control_read_files ', and '--bowtie_path \n options), otherwise, ChIP-seq FASTQ files for target and control and the path to Bowtie \nexecutables are required.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--chipseq_peak_file", + "description" : "Full path to a ChIP-seq peak file in ENCODE's narrowPeak, i.e. BED6+4, format. This file is used \nwhen running prior-enhanced RSEM in the default two-partition model. It partitions isoforms by \nwhether they have ChIP-seq overlapping with their transcription start site region or not. Each \npartition will have its own prior parameter learned from a training set. This file can be either \ngzipped or ungzipped.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chipseq_target_read_files", + "description" : "Comma-separated full path of FASTQ read file(s) for ChIP-seq target. This option is used when running \nprior-enhanced RSEM. It provides information to calculate ChIP-seq peaks and signals. The file(s) \ncan be either ungzipped or gzipped with a suffix '.gz' or '.gzip'. The options '--bowtie_path ' \nand '--chipseq_control_read_files ' must be defined when this option is specified.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chipseq_control_read_files", + "description" : "Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol. This option is used when running \nprior-enhanced RSEM. It provides information to call ChIP-seq peaks. The file(s) can be either \nungzipped or gzipped with a suffix '.gz' or '.gzip'. The options '--bowtie_path ' and \n'--chipseq_target_read_files ' must be defined when this option is specified.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chipseq_read_files_multi_targets", + "description" : "Comma-separated full path of FASTQ read files for multiple ChIP-seq targets. This option is used when \nrunning prior-enhanced RSEM, where prior is learned from multiple complementary data sets. It provides \ninformation to calculate ChIP-seq signals. All files can be either ungzipped or gzipped with a suffix \n'.gz' or '.gzip'. When this option is specified, the option '--bowtie_path ' must be defined and \nthe option '--partition_model ' will be set to 'cmb_lgt' automatically.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chipseq_bed_files_multi_targets", + "description" : "Comma-separated full path of BED files for multiple ChIP-seq targets. This option is used when running \nprior-enhanced RSEM, where prior is learned from multiple complementary data sets. It provides information \nof ChIP-seq signals and must have at least the first six BED columns. All files can be either ungzipped \nor gzipped with a suffix '.gz' or '.gzip'. When this option is specified, the option '--partition_model \n' will be set to 'cmb_lgt' automatically.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--cap_stacked_chipseq_reads", + "description" : "Keep a maximum number of ChIP-seq reads that aligned to the same genomic interval. This option is used \nwhen running prior-enhanced RSEM, where prior is learned from multiple complementary data sets. This \noption is only in use when either '--chipseq_read_files_multi_targets ' or \n'--chipseq_bed_files_multi_targets ' is specified.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--n_max_stacked_chipseq_reads", + "description" : "The maximum number of stacked ChIP-seq reads to keep. This option is used when running prior-enhanced \nRSEM, where prior is learned from multiple complementary data sets. This option is only in use when the \noption '--cap_stacked_chipseq_reads' is set.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--partition_model", + "description" : "A keyword to specify the partition model used by prior-enhanced RSEM. It must be one of the following \nkeywords:\n* pk\n* pk_lgtnopk\n* lm3, lm4, lm5, or lm6\n* nopk_lm2pk, nopk_lm3pk, nopk_lm4pk, or nopk_lm5pk\n* pk_lm2nopk, pk_lm3nopk, pk_lm4nopk, or pk_lm5nopk\n* cmb_lgt\nParameters for all the above models are learned from a training set. For detailed explanations, please \nsee prior-enhanced RSEM's paper. (Default: 'pk')\n", + "example" : [ + "pk" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Calculate expression with RSEM. \n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "Transcriptome", + "Index", + "Alignment", + "RSEM" + ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "https://doi.org/10.1186/1471-2105-12-323" + ] + }, + "links" : { + "repository" : "https://github.com/deweylab/RSEM", + "homepage" : "https://deweylab.github.io/RSEM/", + "documentation" : "https://deweylab.github.io/RSEM/rsem-calculate-expression.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "build-essential", + "gcc", + "g++", + "make", + "wget", + "zlib1g-dev", + "unzip" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\napt-get clean && \\\\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/2.7.11a.zip && \\\\\nunzip 2.7.11a.zip && \\\\\ncp STAR-2.7.11a/bin/Linux_x86_64_static/STAR /usr/local/bin && \\\\\ncd && \\\\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v1.3.3.zip && \\\\\nunzip v1.3.3.zip && \\\\\ncd RSEM-1.3.3 && \\\\\nmake && \\\\\nmake install\n" + ], + "env" : [ + "STAR_VERSION=2.7.11b", + "RSEM_VERSION=1.3.3" + ] + }, + { + "type" : "docker", + "run" : [ + "echo \\"RSEM: `rsem-calculate-expression --version | sed -e 's/Current version: RSEM v//g'`\\" > /var/software_versions.txt && \\\\\necho \\"STAR: `STAR --version`\\" >> /var/software_versions.txt && \\\\\necho \\"bowtie2: `bowtie2 --version | grep -oP '\\\\d+\\\\.\\\\d+\\\\.\\\\d+'`\\" >> /var/software_versions.txt && \\\\\necho \\"bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \\\\K\\\\d+\\\\.\\\\d+\\\\.\\\\d+'`\\" >> /var/software_versions.txt && \\\\\necho \\"HISAT2: `hisat2 --version | grep -oP 'hisat2-align-s version \\\\K\\\\d+\\\\.\\\\d+\\\\.\\\\d+'`\\" >> /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/rsem/rsem_calculate_expression/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/rsem/rsem_calculate_expression", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) +$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRA_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extra_args='&'#" ; else echo "# par_extra_args="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE+x} ]; then echo "${VIASH_PAR_COUNTS_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_gene='&'#" ; else echo "# par_counts_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_TRANSCRIPTS+x} ]; then echo "${VIASH_PAR_COUNTS_TRANSCRIPTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_transcripts='&'#" ; else echo "# par_counts_transcripts="; fi ) +$( if [ ! -z ${VIASH_PAR_STAT+x} ]; then echo "${VIASH_PAR_STAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_stat='&'#" ; else echo "# par_stat="; fi ) +$( if [ ! -z ${VIASH_PAR_LOGS+x} ]; then echo "${VIASH_PAR_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_logs='&'#" ; else echo "# par_logs="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM_STAR+x} ]; then echo "${VIASH_PAR_BAM_STAR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_star='&'#" ; else echo "# par_bam_star="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM_GENOME+x} ]; then echo "${VIASH_PAR_BAM_GENOME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_genome='&'#" ; else echo "# par_bam_genome="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_BAM_TRANSCRIPT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_transcript='&'#" ; else echo "# par_bam_transcript="; fi ) +$( if [ ! -z ${VIASH_PAR_SORT_BAM_BY_READ_NAME+x} ]; then echo "${VIASH_PAR_SORT_BAM_BY_READ_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sort_bam_by_read_name='&'#" ; else echo "# par_sort_bam_by_read_name="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_BAM_OUTPUT+x} ]; then echo "${VIASH_PAR_NO_BAM_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_bam_output='&'#" ; else echo "# par_no_bam_output="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLING_FOR_BAM+x} ]; then echo "${VIASH_PAR_SAMPLING_FOR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sampling_for_bam='&'#" ; else echo "# par_sampling_for_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_GENOME_BAM+x} ]; then echo "${VIASH_PAR_OUTPUT_GENOME_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_genome_bam='&'#" ; else echo "# par_output_genome_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_SORT_BAM_BY_COORDINATE+x} ]; then echo "${VIASH_PAR_SORT_BAM_BY_COORDINATE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sort_bam_by_coordinate='&'#" ; else echo "# par_sort_bam_by_coordinate="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_QUALITIES+x} ]; then echo "${VIASH_PAR_NO_QUALITIES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_qualities='&'#" ; else echo "# par_no_qualities="; fi ) +$( if [ ! -z ${VIASH_PAR_ALIGNMENTS+x} ]; then echo "${VIASH_PAR_ALIGNMENTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_alignments='&'#" ; else echo "# par_alignments="; fi ) +$( if [ ! -z ${VIASH_PAR_FAI+x} ]; then echo "${VIASH_PAR_FAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fai='&'#" ; else echo "# par_fai="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE2+x} ]; then echo "${VIASH_PAR_BOWTIE2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie2='&'#" ; else echo "# par_bowtie2="; fi ) +$( if [ ! -z ${VIASH_PAR_STAR+x} ]; then echo "${VIASH_PAR_STAR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_star='&'#" ; else echo "# par_star="; fi ) +$( if [ ! -z ${VIASH_PAR_HISAT2_HCA+x} ]; then echo "${VIASH_PAR_HISAT2_HCA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_hisat2_hca='&'#" ; else echo "# par_hisat2_hca="; fi ) +$( if [ ! -z ${VIASH_PAR_APPEND_NAMES+x} ]; then echo "${VIASH_PAR_APPEND_NAMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_append_names='&'#" ; else echo "# par_append_names="; fi ) +$( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "${VIASH_PAR_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seed='&'#" ; else echo "# par_seed="; fi ) +$( if [ ! -z ${VIASH_PAR_SINGLE_CELL_PRIOR+x} ]; then echo "${VIASH_PAR_SINGLE_CELL_PRIOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_single_cell_prior='&'#" ; else echo "# par_single_cell_prior="; fi ) +$( if [ ! -z ${VIASH_PAR_CALC_PME+x} ]; then echo "${VIASH_PAR_CALC_PME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_calc_pme='&'#" ; else echo "# par_calc_pme="; fi ) +$( if [ ! -z ${VIASH_PAR_CALC_CI+x} ]; then echo "${VIASH_PAR_CALC_CI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_calc_ci='&'#" ; else echo "# par_calc_ci="; fi ) +$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) +$( if [ ! -z ${VIASH_PAR_SEED_LENGTH+x} ]; then echo "${VIASH_PAR_SEED_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seed_length='&'#" ; else echo "# par_seed_length="; fi ) +$( if [ ! -z ${VIASH_PAR_PHRED64_QUALS+x} ]; then echo "${VIASH_PAR_PHRED64_QUALS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_phred64_quals='&'#" ; else echo "# par_phred64_quals="; fi ) +$( if [ ! -z ${VIASH_PAR_SOLEXA_QUALS+x} ]; then echo "${VIASH_PAR_SOLEXA_QUALS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_solexa_quals='&'#" ; else echo "# par_solexa_quals="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE_N+x} ]; then echo "${VIASH_PAR_BOWTIE_N}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie_n='&'#" ; else echo "# par_bowtie_n="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE_E+x} ]; then echo "${VIASH_PAR_BOWTIE_E}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie_e='&'#" ; else echo "# par_bowtie_e="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE_M+x} ]; then echo "${VIASH_PAR_BOWTIE_M}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie_m='&'#" ; else echo "# par_bowtie_m="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE_CHUNKMBS+x} ]; then echo "${VIASH_PAR_BOWTIE_CHUNKMBS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie_chunkmbs='&'#" ; else echo "# par_bowtie_chunkmbs="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE2_MISMATCH_RATE+x} ]; then echo "${VIASH_PAR_BOWTIE2_MISMATCH_RATE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie2_mismatch_rate='&'#" ; else echo "# par_bowtie2_mismatch_rate="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE2_K+x} ]; then echo "${VIASH_PAR_BOWTIE2_K}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie2_k='&'#" ; else echo "# par_bowtie2_k="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE2_SENSITIVITY_LEVEL+x} ]; then echo "${VIASH_PAR_BOWTIE2_SENSITIVITY_LEVEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie2_sensitivity_level='&'#" ; else echo "# par_bowtie2_sensitivity_level="; fi ) +$( if [ ! -z ${VIASH_PAR_STAR_GZIPPED_READ_FILE+x} ]; then echo "${VIASH_PAR_STAR_GZIPPED_READ_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_star_gzipped_read_file='&'#" ; else echo "# par_star_gzipped_read_file="; fi ) +$( if [ ! -z ${VIASH_PAR_STAR_BZIPPED_READ_FILE+x} ]; then echo "${VIASH_PAR_STAR_BZIPPED_READ_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_star_bzipped_read_file='&'#" ; else echo "# par_star_bzipped_read_file="; fi ) +$( if [ ! -z ${VIASH_PAR_STAR_OUTPUT_GENOME_BAM+x} ]; then echo "${VIASH_PAR_STAR_OUTPUT_GENOME_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_star_output_genome_bam='&'#" ; else echo "# par_star_output_genome_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_TAG+x} ]; then echo "${VIASH_PAR_TAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tag='&'#" ; else echo "# par_tag="; fi ) +$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH_MIN+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH_MIN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length_min='&'#" ; else echo "# par_fragment_length_min="; fi ) +$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH_MAX+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH_MAX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length_max='&'#" ; else echo "# par_fragment_length_max="; fi ) +$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH_MEAN+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH_MEAN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length_mean='&'#" ; else echo "# par_fragment_length_mean="; fi ) +$( if [ ! -z ${VIASH_PAR_GRAGMENT_LENGTH_SD+x} ]; then echo "${VIASH_PAR_GRAGMENT_LENGTH_SD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gragment_length_sd='&'#" ; else echo "# par_gragment_length_sd="; fi ) +$( if [ ! -z ${VIASH_PAR_ESTIMATE_RSPD+x} ]; then echo "${VIASH_PAR_ESTIMATE_RSPD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_estimate_rspd='&'#" ; else echo "# par_estimate_rspd="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_RSPD_BINS+x} ]; then echo "${VIASH_PAR_NUM_RSPD_BINS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_rspd_bins='&'#" ; else echo "# par_num_rspd_bins="; fi ) +$( if [ ! -z ${VIASH_PAR_GIBBS_BURNIN+x} ]; then echo "${VIASH_PAR_GIBBS_BURNIN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gibbs_burnin='&'#" ; else echo "# par_gibbs_burnin="; fi ) +$( if [ ! -z ${VIASH_PAR_GIBBS_NUMBER_OF_SAMPLES+x} ]; then echo "${VIASH_PAR_GIBBS_NUMBER_OF_SAMPLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gibbs_number_of_samples='&'#" ; else echo "# par_gibbs_number_of_samples="; fi ) +$( if [ ! -z ${VIASH_PAR_GIBBS_SAMPLING_GAP+x} ]; then echo "${VIASH_PAR_GIBBS_SAMPLING_GAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gibbs_sampling_gap='&'#" ; else echo "# par_gibbs_sampling_gap="; fi ) +$( if [ ! -z ${VIASH_PAR_CI_CREDIBILITY_LEVEL+x} ]; then echo "${VIASH_PAR_CI_CREDIBILITY_LEVEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ci_credibility_level='&'#" ; else echo "# par_ci_credibility_level="; fi ) +$( if [ ! -z ${VIASH_PAR_CI_NUMBER_OF_SAMPLES_PER_COUNT_VECTOR+x} ]; then echo "${VIASH_PAR_CI_NUMBER_OF_SAMPLES_PER_COUNT_VECTOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ci_number_of_samples_per_count_vector='&'#" ; else echo "# par_ci_number_of_samples_per_count_vector="; fi ) +$( if [ ! -z ${VIASH_PAR_KEEP_INTERMEDIATE_FILES+x} ]; then echo "${VIASH_PAR_KEEP_INTERMEDIATE_FILES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_keep_intermediate_files='&'#" ; else echo "# par_keep_intermediate_files="; fi ) +$( if [ ! -z ${VIASH_PAR_TEMPORARY_FOLDER+x} ]; then echo "${VIASH_PAR_TEMPORARY_FOLDER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_temporary_folder='&'#" ; else echo "# par_temporary_folder="; fi ) +$( if [ ! -z ${VIASH_PAR_TIME+x} ]; then echo "${VIASH_PAR_TIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_time='&'#" ; else echo "# par_time="; fi ) +$( if [ ! -z ${VIASH_PAR_RUN_PRSEM+x} ]; then echo "${VIASH_PAR_RUN_PRSEM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_run_pRSEM='&'#" ; else echo "# par_run_pRSEM="; fi ) +$( if [ ! -z ${VIASH_PAR_CHIPSEQ_PEAK_FILE+x} ]; then echo "${VIASH_PAR_CHIPSEQ_PEAK_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chipseq_peak_file='&'#" ; else echo "# par_chipseq_peak_file="; fi ) +$( if [ ! -z ${VIASH_PAR_CHIPSEQ_TARGET_READ_FILES+x} ]; then echo "${VIASH_PAR_CHIPSEQ_TARGET_READ_FILES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chipseq_target_read_files='&'#" ; else echo "# par_chipseq_target_read_files="; fi ) +$( if [ ! -z ${VIASH_PAR_CHIPSEQ_CONTROL_READ_FILES+x} ]; then echo "${VIASH_PAR_CHIPSEQ_CONTROL_READ_FILES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chipseq_control_read_files='&'#" ; else echo "# par_chipseq_control_read_files="; fi ) +$( if [ ! -z ${VIASH_PAR_CHIPSEQ_READ_FILES_MULTI_TARGETS+x} ]; then echo "${VIASH_PAR_CHIPSEQ_READ_FILES_MULTI_TARGETS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chipseq_read_files_multi_targets='&'#" ; else echo "# par_chipseq_read_files_multi_targets="; fi ) +$( if [ ! -z ${VIASH_PAR_CHIPSEQ_BED_FILES_MULTI_TARGETS+x} ]; then echo "${VIASH_PAR_CHIPSEQ_BED_FILES_MULTI_TARGETS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chipseq_bed_files_multi_targets='&'#" ; else echo "# par_chipseq_bed_files_multi_targets="; fi ) +$( if [ ! -z ${VIASH_PAR_CAP_STACKED_CHIPSEQ_READS+x} ]; then echo "${VIASH_PAR_CAP_STACKED_CHIPSEQ_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cap_stacked_chipseq_reads='&'#" ; else echo "# par_cap_stacked_chipseq_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_N_MAX_STACKED_CHIPSEQ_READS+x} ]; then echo "${VIASH_PAR_N_MAX_STACKED_CHIPSEQ_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_n_max_stacked_chipseq_reads='&'#" ; else echo "# par_n_max_stacked_chipseq_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_PARTITION_MODEL+x} ]; then echo "${VIASH_PAR_PARTITION_MODEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_partition_model='&'#" ; else echo "# par_partition_model="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -eo pipefail + +if [ "\\$par_strandedness" == 'forward' ]; then + strandedness='--strandedness forward' +elif [ "\\$par_strandedness" == 'reverse' ]; then + strandedness="--strandedness reverse" +else + strandedness='' +fi + +IFS=";" read -ra input <<< \\$par_input + +INDEX=\\$(find -L \\$par_index -name "*.grp" | sed 's/\\\\.grp\\$//') + +unset_if_false=( par_paired par_quiet par_no_bam_output par_sampling_for_bam par_no_qualities + par_alignments par_bowtie2 par_star par_hisat2_hca par_append_names + par_single_cell_prior par_calc_pme par_calc_ci par_phred64_quals + par_solexa_quals par_star_gzipped_read_file par_star_bzipped_read_file + par_star_output_genome_bam par_estimate_rspd par_keep_intermediate_files + par_time par_run_pRSEM par_cap_stacked_chipseq_reads par_sort_bam_by_read_name par_sort_bam_by_coordinate ) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +rsem-calculate-expression \\\\ + \\${par_quiet:+-q} \\\\ + \\${par_no_bam_output:+--no-bam-output} \\\\ + \\${par_sampling_for_bam:+--sampling-for-bam} \\\\ + \\${par_no_qualities:+--no-qualities} \\\\ + \\${par_alignments:+--alignments} \\\\ + \\${par_bowtie2:+--bowtie2} \\\\ + \\${par_star:+--star} \\\\ + \\${par_hisat2_hca:+--hisat2-hca} \\\\ + \\${par_append_names:+--append-names} \\\\ + \\${par_single_cell_prior:+--single-cell-prior} \\\\ + \\${par_calc_pme:+--calc-pme} \\\\ + \\${par_calc_ci:+--calc-ci} \\\\ + \\${par_phred64_quals:+--phred64-quals} \\\\ + \\${par_solexa_quals:+--solexa-quals} \\\\ + \\${par_star_gzipped_read_file:+--star-gzipped-read-file} \\\\ + \\${par_star_bzipped_read_file:+--star-bzipped-read-file} \\\\ + \\${par_star_output_genome_bam:+--star-output-genome-bam} \\\\ + \\${par_estimate_rspd:+--estimate-rspd} \\\\ + \\${par_keep_intermediate_files:+--keep-intermediate-files} \\\\ + \\${par_time:+--time} \\\\ + \\${par_run_pRSEM:+--run-pRSEM} \\\\ + \\${par_cap_stacked_chipseq_reads:+--cap-stacked-chipseq-reads} \\\\ + \\${par_sort_bam_by_read_name:+--sort-bam-by-read-name} \\\\ + \\${par_sort_bam_by_coordinate:+--sort-bam-by-coordinate} \\\\ + \\${par_fai:+--fai "\\$par_fai"} \\\\ + \\${par_seed:+--seed "\\$par_seed"} \\\\ + \\${par_seed_length:+--seed-length "\\$par_seed_length"} \\\\ + \\${par_bowtie_n:+--bowtie-n "\\$par_bowtie_n"} \\\\ + \\${par_bowtie_e:+--bowtie-e "\\$par_bowtie_e"} \\\\ + \\${par_bowtie_m:+--bowtie-m "\\$par_bowtie_m"} \\\\ + \\${par_bowtie_chunkmbs:+--bowtie-chunkmbs "\\$par_bowtie_chunkmbs"} \\\\ + \\${par_bowtie2_mismatch_rate:+--bowtie2-mismatch-rate "\\$par_bowtie2_mismatch_rate"} \\\\ + \\${par_bowtie2_k:+--bowtie2-k "\\$par_bowtie2_k"} \\\\ + \\${par_bowtie2_sensitivity_level:+--bowtie2-sensitivity-level "\\$par_bowtie2_sensitivity_level"} \\\\ + \\${par_tag:+--tag "\\$par_tag"} \\\\ + \\${par_fragment_length_min:+--fragment-length-min "\\$par_fragment_length_min"} \\\\ + \\${par_fragment_length_max:+--fragment-length-max "\\$par_fragment_length_max"} \\\\ + \\${par_fragment_length_mean:+--fragment-length-mean "\\$par_fragment_length_mean"} \\\\ + \\${par_fragment_length_sd:+--fragment-length-sd "\\$par_fragment_length_sd"} \\\\ + \\${par_num_rspd_bins:+--num-rspd-bins "\\$par_num_rspd_bins"} \\\\ + \\${par_gibbs_burnin:+--gibbs-burnin "\\$par_gibbs_burnin"} \\\\ + \\${par_gibbs_number_of_samples:+--gibbs-number-of-samples "\\$par_gibbs_number_of_samples"} \\\\ + \\${par_gibbs_sampling_gap:+--gibbs-sampling-gap "\\$par_gibbs_sampling_gap"} \\\\ + \\${par_ci_credibility_level:+--ci-credibility-level "\\$par_ci_credibility_level"} \\\\ + \\${par_ci_number_of_samples_per_count_vector:+--ci-number-of-samples-per-count-vector "\\$par_ci_number_of_samples_per_count_vector"} \\\\ + \\${par_temporary_folder:+--temporary-folder "\\$par_temporary_folder"} \\\\ + \\${par_chipseq_peak_file:+--chipseq-peak-file "\\$par_chipseq_peak_file"} \\\\ + \\${par_chipseq_target_read_files:+--chipseq-target-read-files "\\$par_chipseq_target_read_files"} \\\\ + \\${par_chipseq_control_read_files:+--chipseq-control-read-files "\\$par_chipseq_control_read_files"} \\\\ + \\${par_chipseq_read_files_multi_targets:+--chipseq-read-files-multi-targets "\\$par_chipseq_read_files_multi_targets"} \\\\ + \\${par_chipseq_bed_files_multi_targets:+--chipseq-bed-files-multi-targets "\\$par_chipseq_bed_files_multi_targets"} \\\\ + \\${par_n_max_stacked_chipseq_reads:+--n-max-stacked-chipseq-reads "\\$par_n_max_stacked_chipseq_reads"} \\\\ + \\${par_partition_model:+--partition-model "\\$par_partition_model"} \\\\ + \\$strandedness \\\\ + \\${par_paired:+--paired-end} \\\\ + \\${input[*]} \\\\ + \\$INDEX \\\\ + \\$par_id + +[[ -f "\\${par_id}.genes.results" ]] && mv "\\${par_id}.genes.results" \\$par_counts_gene +[[ -f "\\${par_id}.isoforms.results" ]] && mv "\\${par_id}.isoforms.results" \\$par_counts_transcripts +[[ -d "\\${par_id}.stat" ]] && mv "\\${par_id}.stat" \\$par_stat +[[ -f "\\${par_id}.log" ]] && mv "\\${par_id}.log" \\$par_logs +[[ -f "\\${par_id}.STAR.genome.bam" ]] && mv "\\${par_id}.STAR.genome.bam" \\$par_bam_star +[[ -f "\\${par_id}.genome.bam" ]] && mv "\\${par_id}.genome.bam" \\$par_bam_genome +[[ -f "\\${par_id}.transcript.bam" ]] && mv "\\${par_id}.transcript.bam" \\$par_bam_transcript +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/rsem/rsem_calculate_expression", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/nextflow.config new file mode 100644 index 0000000..4ff4992 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'rsem/rsem_calculate_expression' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Calculate expression with RSEM. \n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/nextflow_schema.json new file mode 100644 index 0000000..63db46d --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/nextflow_schema.json @@ -0,0 +1,839 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "rsem_calculate_expression", +"description": "Calculate expression with RSEM. \n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "string", + "description": "Type: `string`. Sample ID", + "help_text": "Type: `string`. Sample ID." + + } + + + , + "strandedness": { + "type": + "string", + "description": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity", + "help_text": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity. Must be one of unstranded, forward, reverse", + "enum": ["forward", "reverse", "unstranded"] + + + } + + + , + "paired": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Paired-end reads or not?", + "help_text": "Type: `boolean_true`, default: `false`. Paired-end reads or not?" + , + "default":false + } + + + , + "input": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. Input reads for quantification", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Input reads for quantification." + + } + + + , + "index": { + "type": + "string", + "description": "Type: `file`. RSEM index", + "help_text": "Type: `file`. RSEM index." + + } + + + , + "extra_args": { + "type": + "string", + "description": "Type: `string`. Extra rsem-calculate-expression arguments in addition to the examples", + "help_text": "Type: `string`. Extra rsem-calculate-expression arguments in addition to the examples." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "counts_gene": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.counts_gene.results`, example: `$id.genes.results`. Expression counts on gene level", + "help_text": "Type: `file`, default: `$id.$key.counts_gene.results`, example: `$id.genes.results`. Expression counts on gene level" + , + "default":"$id.$key.counts_gene.results" + } + + + , + "counts_transcripts": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.counts_transcripts.results`, example: `$id.isoforms.results`. Expression counts on transcript level", + "help_text": "Type: `file`, default: `$id.$key.counts_transcripts.results`, example: `$id.isoforms.results`. Expression counts on transcript level" + , + "default":"$id.$key.counts_transcripts.results" + } + + + , + "stat": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.stat.stat`, example: `$id.stat`. RSEM statistics", + "help_text": "Type: `file`, default: `$id.$key.stat.stat`, example: `$id.stat`. RSEM statistics" + , + "default":"$id.$key.stat.stat" + } + + + , + "logs": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.logs.log`, example: `$id.log`. RSEM logs", + "help_text": "Type: `file`, default: `$id.$key.logs.log`, example: `$id.log`. RSEM logs" + , + "default":"$id.$key.logs.log" + } + + + , + "bam_star": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.bam_star.bam`, example: `$id.STAR.genome.bam`. BAM file generated by STAR (optional)", + "help_text": "Type: `file`, default: `$id.$key.bam_star.bam`, example: `$id.STAR.genome.bam`. BAM file generated by STAR (optional)" + , + "default":"$id.$key.bam_star.bam" + } + + + , + "bam_genome": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.bam_genome.bam`, example: `$id.genome.bam`. Genome BAM file (optional)", + "help_text": "Type: `file`, default: `$id.$key.bam_genome.bam`, example: `$id.genome.bam`. Genome BAM file (optional)" + , + "default":"$id.$key.bam_genome.bam" + } + + + , + "bam_transcript": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.bam_transcript.bam`, example: `$id.transcript.bam`. Transcript BAM file (optional)", + "help_text": "Type: `file`, default: `$id.$key.bam_transcript.bam`, example: `$id.transcript.bam`. Transcript BAM file (optional)" + , + "default":"$id.$key.bam_transcript.bam" + } + + + , + "sort_bam_by_read_name": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Sort BAM file aligned under transcript coordidate by read name", + "help_text": "Type: `boolean_true`, default: `false`. Sort BAM file aligned under transcript coordidate by read name. Setting this option on will produce \ndeterministic maximum likelihood estimations from independent runs. Note that sorting will take long \ntime and lots of memory.\n" + , + "default":false + } + + + , + "no_bam_output": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Do not output any BAM file", + "help_text": "Type: `boolean_true`, default: `false`. Do not output any BAM file." + , + "default":false + } + + + , + "sampling_for_bam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. When RSEM generates a BAM file, instead of outputting all alignments a read has with their posterior \nprobabilities, one alignment is sampled according to the posterior probabilities", + "help_text": "Type: `boolean_true`, default: `false`. When RSEM generates a BAM file, instead of outputting all alignments a read has with their posterior \nprobabilities, one alignment is sampled according to the posterior probabilities. The sampling procedure \nincludes the alignment to the \"noise\" transcript, which does not appear in the BAM file. Only the \nsampled alignment has a weight of 1. All other alignments have weight 0. If the \"noise\" transcript is \nsampled, all alignments appeared in the BAM file should have weight 0.\n" + , + "default":false + } + + + , + "output_genome_bam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Generate a BAM file, \u0027sample_name", + "help_text": "Type: `boolean_true`, default: `false`. Generate a BAM file, \u0027sample_name.genome.bam\u0027, with alignments mapped to genomic coordinates and \nannotated with their posterior probabilities. In addition, RSEM will call samtools (included in RSEM \npackage) to sort and index the bam file. \u0027sample_name.genome.sorted.bam\u0027 and \u0027sample_name.genome.sorted.bam.bai\u0027 \nwill be generated.\n" + , + "default":false + } + + + , + "sort_bam_by_coordinate": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Sort RSEM generated transcript and genome BAM files by coordinates and build associated indices", + "help_text": "Type: `boolean_true`, default: `false`. Sort RSEM generated transcript and genome BAM files by coordinates and build associated indices.\n" + , + "default":false + } + + +} +}, + + + "basic options" : { + "title": "Basic Options", + "type": "object", + "description": "No description", + "properties": { + + + "no_qualities": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input reads do not contain quality scores", + "help_text": "Type: `boolean_true`, default: `false`. Input reads do not contain quality scores." + , + "default":false + } + + + , + "alignments": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input file contains alignments in SAM/BAM/CRAM format", + "help_text": "Type: `boolean_true`, default: `false`. Input file contains alignments in SAM/BAM/CRAM format. The exact file format will be determined \nautomatically.\n" + , + "default":false + } + + + , + "fai": { + "type": + "string", + "description": "Type: `file`. If the header section of input alignment file does not contain reference sequence information, \nthis option should be turned on", + "help_text": "Type: `file`. If the header section of input alignment file does not contain reference sequence information, \nthis option should be turned on. \u003cfile\u003e is a FAI format file containing each reference sequence\u0027s \nname and length. Please refer to the SAM official website for the details of FAI format.\n" + + } + + + , + "bowtie2": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use Bowtie 2 instead of Bowtie to align reads", + "help_text": "Type: `boolean_true`, default: `false`. Use Bowtie 2 instead of Bowtie to align reads. Since currently RSEM does not handle indel, local \nand discordant alignments, the Bowtie2 parameters are set in a way to avoid those alignments. In \nparticular, we use options \u0027--sensitive --dpad 0 --gbar 99999999 --mp 1,1 --np 1 --score_min L,0,-0.1\u0027 \nby default. The last parameter of \u0027--score_min\u0027, \u0027-0.1\u0027, is the negative of maximum mismatch rate. \nThis rate can be set by option \u0027--bowtie2_mismatch_rate\u0027. If reads are paired-end, we additionally \nuse options \u0027--no_mixed\u0027 and \u0027--no_discordant\u0027.\n" + , + "default":false + } + + + , + "star": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use STAR to align reads", + "help_text": "Type: `boolean_true`, default: `false`. Use STAR to align reads. Alignment parameters are from ENCODE3\u0027s STAR-RSEM pipeline. To save \ncomputational time and memory resources, STAR\u0027s Output BAM file is unsorted. It is stored in RSEM\u0027s \ntemporary directory with name as \u0027sample_name.bam\u0027. Each STAR job will have its own private copy of \nthe genome in memory.\n" + , + "default":false + } + + + , + "hisat2_hca": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use HISAT2 to align reads to the transcriptome according to Human Cell Atlast", + "help_text": "Type: `boolean_true`, default: `false`. Use HISAT2 to align reads to the transcriptome according to Human Cell Atlast.\n" + , + "default":false + } + + + , + "append_names": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If gene_name/transcript_name is available, append it to the end of gene_id/transcript_id (separated \nby \u0027_\u0027) in files \u0027sample_name", + "help_text": "Type: `boolean_true`, default: `false`. If gene_name/transcript_name is available, append it to the end of gene_id/transcript_id (separated \nby \u0027_\u0027) in files \u0027sample_name.isoforms.results\u0027 and \u0027sample_name.genes.results\u0027.\n" + , + "default":false + } + + + , + "seed": { + "type": + "integer", + "description": "Type: `integer`. Set the seed for the random number generators used in calculating posterior mean estimates and \ncredibility intervals", + "help_text": "Type: `integer`. Set the seed for the random number generators used in calculating posterior mean estimates and \ncredibility intervals. The seed must be a non-negative 32 bit integer.\n" + + } + + + , + "single_cell_prior": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. By default, RSEM uses Dirichlet(1) as the prior to calculate posterior mean estimates and credibility \nintervals", + "help_text": "Type: `boolean_true`, default: `false`. By default, RSEM uses Dirichlet(1) as the prior to calculate posterior mean estimates and credibility \nintervals. However, much less genes are expressed in single cell RNA-Seq data. Thus, if you want to \ncompute posterior mean estimates and/or credibility intervals and you have single-cell RNA-Seq data, \nyou are recommended to turn on this option. Then RSEM will use Dirichlet(0.1) as the prior which \nencourage the sparsity of the expression levels.\n" + , + "default":false + } + + + , + "calc_pme": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Run RSEM\u0027s collapsed Gibbs sampler to calculate posterior mean estimates", + "help_text": "Type: `boolean_true`, default: `false`. Run RSEM\u0027s collapsed Gibbs sampler to calculate posterior mean estimates." + , + "default":false + } + + + , + "calc_ci": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Calculate 95% credibility intervals and posterior mean estimates", + "help_text": "Type: `boolean_true`, default: `false`. Calculate 95% credibility intervals and posterior mean estimates. The credibility level can be \nchanged by setting \u0027--ci_credibility_level\u0027.\n" + , + "default":false + } + + + , + "quiet": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Suppress the output of logging information", + "help_text": "Type: `boolean_true`, default: `false`. Suppress the output of logging information." + , + "default":false + } + + +} +}, + + + "aligner options" : { + "title": "Aligner Options", + "type": "object", + "description": "No description", + "properties": { + + + "seed_length": { + "type": + "integer", + "description": "Type: `integer`, example: `25`. Seed length used by the read aligner", + "help_text": "Type: `integer`, example: `25`. Seed length used by the read aligner. Providing the correct value is important for RSEM. If RSEM \nruns Bowtie, it uses this value for Bowtie\u0027s seed length parameter. Any read with its or at least \none of its mates\u0027 (for paired-end reads) length less than this value will be ignored. If the \nreferences are not added poly(A) tails, the minimum allowed value is 5, otherwise, the minimum \nallowed value is 25. Note that this script will only check if the value \u003e= 5 and give a warning \nmessage if the value \u003c 25 but \u003e= 5. (Default: 25)\n" + + } + + + , + "phred64_quals": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input quality scores are encoded as Phred+64 (default for GA Pipeline ver", + "help_text": "Type: `boolean_true`, default: `false`. Input quality scores are encoded as Phred+64 (default for GA Pipeline ver. \u003e= 1.3). This option is \nused by Bowtie, Bowtie 2 and HISAT2. Otherwise, quality score will be encoded as Phred+33. (Default: false)\n" + , + "default":false + } + + + , + "solexa_quals": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input quality scores are solexa encoded (from GA Pipeline ver", + "help_text": "Type: `boolean_true`, default: `false`. Input quality scores are solexa encoded (from GA Pipeline ver. \u003c 1.3). This option is used by \nBowtie, Bowtie 2 and HISAT2. Otherwise, quality score will be encoded as Phred+33. (Default: false)\n" + , + "default":false + } + + + , + "bowtie_n": { + "type": + "integer", + "description": "Type: `integer`, example: `2`, choices: ``0`, `1`, `2`, `3``. (Bowtie parameter) max # of mismatches in the seed", + "help_text": "Type: `integer`, example: `2`, choices: ``0`, `1`, `2`, `3``. (Bowtie parameter) max # of mismatches in the seed. (Range: 0-3, Default: 2)\n", + "enum": [0, 1, 2, 3] + + + } + + + , + "bowtie_e": { + "type": + "integer", + "description": "Type: `integer`, example: `99999999`. (Bowtie parameter) max sum of mismatch quality scores across the alignment", + "help_text": "Type: `integer`, example: `99999999`. (Bowtie parameter) max sum of mismatch quality scores across the alignment. (Default: 99999999)\n" + + } + + + , + "bowtie_m": { + "type": + "integer", + "description": "Type: `integer`, example: `200`. (Bowtie parameter) suppress all alignments for a read if \u003e \u003cint\u003e valid alignments exist", + "help_text": "Type: `integer`, example: `200`. (Bowtie parameter) suppress all alignments for a read if \u003e \u003cint\u003e valid alignments exist. (Default: 200)\n" + + } + + + , + "bowtie_chunkmbs": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. (Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use Bowtie\u0027s default)\n", + "help_text": "Type: `integer`, example: `0`. (Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use Bowtie\u0027s default)\n" + + } + + + , + "bowtie2_mismatch_rate": { + "type": + "number", + "description": "Type: `double`, example: `0.1`. (Bowtie 2 parameter) The maximum mismatch rate allowed", + "help_text": "Type: `double`, example: `0.1`. (Bowtie 2 parameter) The maximum mismatch rate allowed. (Default: 0.1)\n" + + } + + + , + "bowtie2_k": { + "type": + "integer", + "description": "Type: `integer`, example: `200`. (Bowtie 2 parameter) Find up to \u003cint\u003e alignments per read", + "help_text": "Type: `integer`, example: `200`. (Bowtie 2 parameter) Find up to \u003cint\u003e alignments per read. (Default: 200)\n" + + } + + + , + "bowtie2_sensitivity_level": { + "type": + "string", + "description": "Type: `string`, example: `sensitive`, choices: ``very_fast`, `fast`, `sensitive`, `very_sensitive``. (Bowtie 2 parameter) Set Bowtie 2\u0027s preset options in --end-to-end mode", + "help_text": "Type: `string`, example: `sensitive`, choices: ``very_fast`, `fast`, `sensitive`, `very_sensitive``. (Bowtie 2 parameter) Set Bowtie 2\u0027s preset options in --end-to-end mode. This option controls how \nhard Bowtie 2 tries to find alignments. \u003cstring\u003e must be one of \"very_fast\", \"fast\", \"sensitive\" \nand \"very_sensitive\". The four candidates correspond to Bowtie 2\u0027s \"--very-fast\", \"--fast\", \n\"--sensitive\" and \"--very-sensitive\" options. (Default: \"sensitive\" - use Bowtie 2\u0027s default)\n", + "enum": ["very_fast", "fast", "sensitive", "very_sensitive"] + + + } + + + , + "star_gzipped_read_file": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by gzip", + "help_text": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by gzip. (Default: false)\n" + , + "default":false + } + + + , + "star_bzipped_read_file": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by bzip2", + "help_text": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by bzip2. (Default: false)\n" + , + "default":false + } + + + , + "star_output_genome_bam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Save the BAM file from STAR alignment under genomic coordinate to \u0027sample_name", + "help_text": "Type: `boolean_true`, default: `false`. Save the BAM file from STAR alignment under genomic coordinate to \u0027sample_name.STAR.genome.bam\u0027. \nThis file is NOT sorted by genomic coordinate. In this file, according to STAR\u0027s manual, \u0027paired \nends of an alignment are always adjacent, and multiple alignments of a read are adjacent as well\u0027. \n(Default: false)\n" + , + "default":false + } + + +} +}, + + + "advanced options" : { + "title": "Advanced Options", + "type": "object", + "description": "No description", + "properties": { + + + "tag": { + "type": + "string", + "description": "Type: `string`, example: ``. The name of the optional field used in the SAM input for identifying a read with too many valid \nalignments", + "help_text": "Type: `string`, example: ``. The name of the optional field used in the SAM input for identifying a read with too many valid \nalignments. The field should have the format \u003ctagName\u003e:i:\u003cvalue\u003e, where a \u003cvalue\u003e bigger than 0 \nindicates a read with too many alignments. (Default: \"\")\n" + + } + + + , + "fragment_length_min": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Minimum read/insert length allowed", + "help_text": "Type: `integer`, example: `1`. Minimum read/insert length allowed. This is also the value for the Bowtie/Bowtie2 -I option. \n(Default: 1)\n" + + } + + + , + "fragment_length_max": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. Maximum read/insert length allowed", + "help_text": "Type: `integer`, example: `1000`. Maximum read/insert length allowed. This is also the value for the Bowtie/Bowtie 2 -X option. \n(Default: 1000)\n" + + } + + + , + "fragment_length_mean": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. (single-end data only) The mean of the fragment length distribution, which is assumed to be a \nGaussian", + "help_text": "Type: `integer`, example: `-1`. (single-end data only) The mean of the fragment length distribution, which is assumed to be a \nGaussian. (Default: -1, which disables use of the fragment length distribution)\n" + + } + + + , + "gragment_length_sd": { + "type": + "number", + "description": "Type: `double`, example: `0.0`. (single-end data only) The standard deviation of the fragment length distribution, which is \nassumed to be a Gaussian", + "help_text": "Type: `double`, example: `0.0`. (single-end data only) The standard deviation of the fragment length distribution, which is \nassumed to be a Gaussian. (Default: 0, which assumes that all fragments are of the same length, \ngiven by the rounded value of --fragment_length_mean).\n" + + } + + + , + "estimate_rspd": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Set this option if you want to estimate the read start position distribution (RSPD) from data", + "help_text": "Type: `boolean_true`, default: `false`. Set this option if you want to estimate the read start position distribution (RSPD) from data.\nOtherwise, RSEM will use a uniform RSPD.\n" + , + "default":false + } + + + , + "num_rspd_bins": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. Number of bins in the RSPD", + "help_text": "Type: `integer`, example: `20`. Number of bins in the RSPD. Only relevant when \u0027--estimate_rspd\u0027 is specified. Use of the default \nsetting is recommended. (Default: 20)\n" + + } + + + , + "gibbs_burnin": { + "type": + "integer", + "description": "Type: `integer`, example: `200`. The number of burn-in rounds for RSEM\u0027s Gibbs sampler", + "help_text": "Type: `integer`, example: `200`. The number of burn-in rounds for RSEM\u0027s Gibbs sampler. Each round passes over the entire data set \nonce. If RSEM can use multiple threads, multiple Gibbs samplers will start at the same time and all \nsamplers share the same burn-in number. (Default: 200)\n" + + } + + + , + "gibbs_number_of_samples": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. The total number of count vectors RSEM will collect from its Gibbs samplers", + "help_text": "Type: `integer`, example: `1000`. The total number of count vectors RSEM will collect from its Gibbs samplers. (Default: 1000)\n" + + } + + + , + "gibbs_sampling_gap": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. The number of rounds between two succinct count vectors RSEM collects", + "help_text": "Type: `integer`, example: `1`. The number of rounds between two succinct count vectors RSEM collects. If the count vector after \nround N is collected, the count vector after round N + \u003cint\u003e will also be collected. (Default: 1)\n" + + } + + + , + "ci_credibility_level": { + "type": + "number", + "description": "Type: `double`, example: `0.95`. The credibility level for credibility intervals", + "help_text": "Type: `double`, example: `0.95`. The credibility level for credibility intervals. (Default: 0.95)\n" + + } + + + , + "ci_number_of_samples_per_count_vector": { + "type": + "integer", + "description": "Type: `integer`, example: `50`. The number of read generating probability vectors sampled per sampled count vector", + "help_text": "Type: `integer`, example: `50`. The number of read generating probability vectors sampled per sampled count vector. The crebility \nintervals are calculated by first sampling P(C | D) and then sampling P(Theta | C) for each sampled \ncount vector. This option controls how many Theta vectors are sampled per sampled count vector. \n(Default: 50)\n" + + } + + + , + "keep_intermediate_files": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Keep temporary files generated by RSEM", + "help_text": "Type: `boolean_true`, default: `false`. Keep temporary files generated by RSEM. RSEM creates a temporary directory, \u0027sample_name.temp\u0027, \ninto which it puts all intermediate output files. If this directory already exists, RSEM overwrites \nall files generated by previous RSEM runs inside of it. By default, after RSEM finishes, the \ntemporary directory is deleted. Set this option to prevent the deletion of this directory and the \nintermediate files inside of it.\n" + , + "default":false + } + + + , + "temporary_folder": { + "type": + "string", + "description": "Type: `string`, example: `sample_name.temp`. Set where to put the temporary files generated by RSEM", + "help_text": "Type: `string`, example: `sample_name.temp`. Set where to put the temporary files generated by RSEM. If the folder specified does not exist, \nRSEM will try to create it. (Default: sample_name.temp)\n" + + } + + + , + "time": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output time consumed by each step of RSEM to \u0027sample_name", + "help_text": "Type: `boolean_true`, default: `false`. Output time consumed by each step of RSEM to \u0027sample_name.time\u0027.\n" + , + "default":false + } + + +} +}, + + + "prior-enhanced rsem options" : { + "title": "Prior-Enhanced RSEM Options", + "type": "object", + "description": "No description", + "properties": { + + + "run_pRSEM": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Running prior-enhanced RSEM (pRSEM)", + "help_text": "Type: `boolean_true`, default: `false`. Running prior-enhanced RSEM (pRSEM). Prior parameters, i.e. isoform\u0027s initial pseudo-count for \nRSEM\u0027s Gibbs sampling, will be learned from input RNA-seq data and an external data set. When pRSEM \nneeds and only needs ChIP-seq peak information to partition isoforms (e.g. in pRSEM\u0027s default \npartition model), either ChIP-seq peak file (with the \u0027--chipseq_peak_file\u0027 option) or ChIP-seq \nFASTQ files for target and input and the path for Bowtie executables are required (with the \n\u0027--chipseq_target_read_files \u003cstring\u003e\u0027, \u0027--chipseq_control_read_files \u003cstring\u003e\u0027, and \u0027--bowtie_path \n\u003cpath\u003e options), otherwise, ChIP-seq FASTQ files for target and control and the path to Bowtie \nexecutables are required.\n" + , + "default":false + } + + + , + "chipseq_peak_file": { + "type": + "string", + "description": "Type: `file`. Full path to a ChIP-seq peak file in ENCODE\u0027s narrowPeak, i", + "help_text": "Type: `file`. Full path to a ChIP-seq peak file in ENCODE\u0027s narrowPeak, i.e. BED6+4, format. This file is used \nwhen running prior-enhanced RSEM in the default two-partition model. It partitions isoforms by \nwhether they have ChIP-seq overlapping with their transcription start site region or not. Each \npartition will have its own prior parameter learned from a training set. This file can be either \ngzipped or ungzipped.\n" + + } + + + , + "chipseq_target_read_files": { + "type": + "string", + "description": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq target", + "help_text": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq target. This option is used when running \nprior-enhanced RSEM. It provides information to calculate ChIP-seq peaks and signals. The file(s) \ncan be either ungzipped or gzipped with a suffix \u0027.gz\u0027 or \u0027.gzip\u0027. The options \u0027--bowtie_path \u003cpath\u003e\u0027 \nand \u0027--chipseq_control_read_files \u003cstring\u003e\u0027 must be defined when this option is specified.\n" + + } + + + , + "chipseq_control_read_files": { + "type": + "string", + "description": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol", + "help_text": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol. This option is used when running \nprior-enhanced RSEM. It provides information to call ChIP-seq peaks. The file(s) can be either \nungzipped or gzipped with a suffix \u0027.gz\u0027 or \u0027.gzip\u0027. The options \u0027--bowtie_path \u003cpath\u003e\u0027 and \n\u0027--chipseq_target_read_files \u003cstring\u003e\u0027 must be defined when this option is specified.\n" + + } + + + , + "chipseq_read_files_multi_targets": { + "type": + "string", + "description": "Type: `file`. Comma-separated full path of FASTQ read files for multiple ChIP-seq targets", + "help_text": "Type: `file`. Comma-separated full path of FASTQ read files for multiple ChIP-seq targets. This option is used when \nrunning prior-enhanced RSEM, where prior is learned from multiple complementary data sets. It provides \ninformation to calculate ChIP-seq signals. All files can be either ungzipped or gzipped with a suffix \n\u0027.gz\u0027 or \u0027.gzip\u0027. When this option is specified, the option \u0027--bowtie_path \u003cpath\u003e\u0027 must be defined and \nthe option \u0027--partition_model \u003cstring\u003e\u0027 will be set to \u0027cmb_lgt\u0027 automatically.\n" + + } + + + , + "chipseq_bed_files_multi_targets": { + "type": + "string", + "description": "Type: `file`. Comma-separated full path of BED files for multiple ChIP-seq targets", + "help_text": "Type: `file`. Comma-separated full path of BED files for multiple ChIP-seq targets. This option is used when running \nprior-enhanced RSEM, where prior is learned from multiple complementary data sets. It provides information \nof ChIP-seq signals and must have at least the first six BED columns. All files can be either ungzipped \nor gzipped with a suffix \u0027.gz\u0027 or \u0027.gzip\u0027. When this option is specified, the option \u0027--partition_model \n\u003cstring\u003e\u0027 will be set to \u0027cmb_lgt\u0027 automatically.\n" + + } + + + , + "cap_stacked_chipseq_reads": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Keep a maximum number of ChIP-seq reads that aligned to the same genomic interval", + "help_text": "Type: `boolean_true`, default: `false`. Keep a maximum number of ChIP-seq reads that aligned to the same genomic interval. This option is used \nwhen running prior-enhanced RSEM, where prior is learned from multiple complementary data sets. This \noption is only in use when either \u0027--chipseq_read_files_multi_targets \u003cstring\u003e\u0027 or \n\u0027--chipseq_bed_files_multi_targets \u003cstring\u003e\u0027 is specified.\n" + , + "default":false + } + + + , + "n_max_stacked_chipseq_reads": { + "type": + "integer", + "description": "Type: `integer`. The maximum number of stacked ChIP-seq reads to keep", + "help_text": "Type: `integer`. The maximum number of stacked ChIP-seq reads to keep. This option is used when running prior-enhanced \nRSEM, where prior is learned from multiple complementary data sets. This option is only in use when the \noption \u0027--cap_stacked_chipseq_reads\u0027 is set.\n" + + } + + + , + "partition_model": { + "type": + "string", + "description": "Type: `string`, example: `pk`. A keyword to specify the partition model used by prior-enhanced RSEM", + "help_text": "Type: `string`, example: `pk`. A keyword to specify the partition model used by prior-enhanced RSEM. It must be one of the following \nkeywords:\n* pk\n* pk_lgtnopk\n* lm3, lm4, lm5, or lm6\n* nopk_lm2pk, nopk_lm3pk, nopk_lm4pk, or nopk_lm5pk\n* pk_lm2nopk, pk_lm3nopk, pk_lm4nopk, or pk_lm5nopk\n* cmb_lgt\nParameters for all the above models are learned from a training set. For detailed explanations, please \nsee prior-enhanced RSEM\u0027s paper. (Default: \u0027pk\u0027)\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/basic options" + }, + + { + "$ref": "#/definitions/aligner options" + }, + + { + "$ref": "#/definitions/advanced options" + }, + + { + "$ref": "#/definitions/prior-enhanced rsem options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/.config.vsh.yaml new file mode 100644 index 0000000..d351a37 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/.config.vsh.yaml @@ -0,0 +1,457 @@ +name: "rsem_prepare_reference" +namespace: "rsem" +version: "v0.3.1" +authors: +- name: "Sai Nirmayi Yasa" + roles: + - "author" + - "maintainer" + info: + links: + email: "nirmayi@data-intuitive.com" + github: "sainirmayi" + linkedin: "sai-nirmayi-yasa" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Junior Bioinformatics Researcher" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--reference_fasta_files" + description: "Semi-colon separated list of Multi-FASTA formatted files OR a directory\ + \ name. If a directory name is specified, RSEM will read all files with suffix\ + \ \".fa\" or \".fasta\" in this directory. The files should contain either the\ + \ sequences of transcripts or an entire genome, depending on whether the '--gtf'\ + \ option is used.\n" + info: null + example: + - "read1.fasta" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--reference_name" + description: "The name of the reference used. RSEM will generate several reference-related\ + \ files that are prefixed by this name. This name can contain path information\ + \ (e.g. '/ref/mm9').\n" + info: null + example: + - "/ref/mm9" + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Directory containing reference files generated by RSEM." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Other options" + arguments: + - type: "file" + name: "--gtf" + description: "Assume that 'reference_fasta_files' contains the sequence of a genome,\ + \ and extract transcript reference sequences using the gene annotations specified\ + \ in the GTF file. If this and '--gff3' options are not provided, RSEM will\ + \ assume 'reference_fasta_files' contains the reference transcripts. In this\ + \ case, RSEM assumes that name of each sequence in the Multi-FASTA files is\ + \ its transcript_id." + info: null + example: + - "annotations.gtf" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gff3" + description: "GFF3 annotation file. Converted to GTF format with the file name\ + \ 'reference_name.gtf'. Please make sure that 'reference_name.gtf' does not\ + \ exist." + info: null + example: + - "annotations.gff" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gff3_rna_patterns" + description: "List of transcript categories (separated by semi-colon). Only transcripts\ + \ that match the string will be extracted." + info: null + example: + - "mRNA;rRNA" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "boolean_true" + name: "--gff3_genes_as_transcripts" + description: "This option is designed for untypical organisms, such as viruses,\ + \ whose GFF3 files only contain genes. RSEM will assume each gene as a unique\ + \ transcript when it converts the GFF3 file into GTF format." + info: null + direction: "input" + - type: "string" + name: "--trusted_sources" + description: "List of trusted sources (separated by semi-colon). Only transcripts\ + \ coming from these sources will be extracted. If this option is off, all sources\ + \ are accepted." + info: null + example: + - "ENSEMBL;HAVANA" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--transcript_to_gene_map" + description: "Use information from this file to map from transcript (isoform)\ + \ ids to gene ids. Each line of this file should be of the form: \n gene_id\ + \ transcript_id\nwith the two fields separated by a tab character.\nIf you are\ + \ using a GTF file for the \"UCSC Genes\" gene set from the UCSC Genome Browser,\ + \ then the \"knownIsoforms.txt\" file (obtained from the \"Downloads\" section\ + \ of the UCSC Genome Browser site) is of this format. \nIf this option is off,\ + \ then the mapping of isoforms to genes depends on whether the '--gtf' option\ + \ is specified. If '--gtf' is specified, then RSEM uses the \"gene_id\" and\ + \ \"transcript_id\" attributes in the GTF file. Otherwise, RSEM assumes that\ + \ each sequence in the reference sequence files is a separate gene.\n" + info: null + example: + - "isoforms.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--allele_to_gene_map" + description: "Use information from to provide gene_id and transcript_id\ + \ information for each allele-specific transcript. Each line of should\ + \ be of the form:\n gene_id transcript_id allele_id\nwith the fields separated\ + \ by a tab character.\nThis option is designed for quantifying allele-specific\ + \ expression. It is only valid if '--gtf' option is not specified. allele_id\ + \ should be the sequence names presented in the Multi-FASTA-formatted files.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--polyA" + description: "Add poly(A) tails to the end of all reference isoforms. The length\ + \ of poly(A) tail added is specified by '--polyA-length' option. STAR aligner\ + \ users may not want to use this option." + info: null + direction: "input" + - type: "integer" + name: "--polyA_length" + description: "The length of the poly(A) tails to be added." + info: null + example: + - 125 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--no_polyA_subset" + description: "Only meaningful if '--polyA' is specified. Do not add poly(A) tails\ + \ to those transcripts listed in this file containing a list of transcript_ids." + info: null + example: + - "transcript_ids.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--bowtie" + description: "Build Bowtie indices." + info: null + direction: "input" + - type: "boolean_true" + name: "--bowtie2" + description: "Build Bowtie 2 indices." + info: null + direction: "input" + - type: "boolean_true" + name: "--star" + description: "Build STAR indices." + info: null + direction: "input" + - type: "integer" + name: "--star_sjdboverhang" + description: "Length of the genomic sequence around annotated junction. It is\ + \ only used for STAR to build splice junctions database and not needed for Bowtie\ + \ or Bowtie2. It will be passed as the --sjdbOverhang option to STAR. According\ + \ to STAR's manual, its ideal value is max(ReadLength)-1, e.g. for 2x101 paired-end\ + \ reads, the ideal value is 101-1=100. In most cases, the default value of 100\ + \ will work as well as the ideal value. (Default is 100)" + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--hisat2_hca" + description: "Build HISAT2 indices on the transcriptome according to Human Cell\ + \ Atlas (HCA) SMART-Seq2 pipeline." + info: null + direction: "input" + - type: "boolean_true" + name: "--quiet" + alternatives: + - "-q" + description: "Suppress the output of logging information." + info: null + direction: "input" +- name: "Prior-enhanced RSEM options" + arguments: + - type: "boolean_true" + name: "--prep_pRSEM" + description: "A Boolean indicating whether to prepare reference files for pRSEM,\ + \ including building Bowtie indices for a genome and selecting training set\ + \ isoforms. The index files will be used for aligning ChIP-seq reads in prior-enhanced\ + \ RSEM and the training set isoforms will be used for learning prior. A path\ + \ to Bowtie executables and a mappability file in bigWig format are required\ + \ when this option is on. Currently, Bowtie2 is not supported for prior-enhanced\ + \ RSEM." + info: null + direction: "input" + - type: "file" + name: "--mappability_bigwig_file" + description: "Full path to a whole-genome mappability file in bigWig format. This\ + \ file is required for running prior-enhanced RSEM. It is used for selecting\ + \ a training set of isoforms for prior-learning. This file can be either downloaded\ + \ from UCSC Genome Browser or generated by GEM (Derrien et al., 2012, PLoS One)." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "RSEM is a software package for estimating gene and isoform expression\ + \ levels from RNA-Seq data. This component prepares transcript references for RSEM.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "Transcriptome" +- "Index" +license: "GPL-3.0" +references: + doi: + - "10.1186/1471-2105-12-323" +links: + repository: "https://github.com/deweylab/RSEM" + homepage: "http://deweylab.github.io/RSEM" + documentation: "https://deweylab.github.io/RSEM/rsem-prepare-reference.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "build-essential" + - "gcc" + - "g++" + - "make" + - "wget" + - "zlib1g-dev" + - "unzip xxd" + - "perl" + - "r-base" + - "bowtie2" + - "pip" + - "git" + interactive: false + - type: "python" + user: false + packages: + - "bowtie" + upgrade: true + - type: "docker" + run: + - "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone\ + \ && \\\ncd /tmp && \\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ + \ && \\\nunzip ${STAR_VERSION}.zip && \\\ncd STAR-${STAR_VERSION}/source &&\ + \ \\\nmake STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\ncp STAR /usr/local/bin\ + \ && \\\ncd /tmp && \\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip\ + \ && \\\nunzip v${RSEM_VERSION}.zip && \\\ncd RSEM-${RSEM_VERSION} && \\\nmake\ + \ && \\\nmake install && \\\ncd /tmp && \\\nwget --no-check-certificate -O bowtie-${BOWTIE_VERSION}-linux-x86_64.zip\ + \ https://sourceforge.net/projects/bowtie-bio/files/bowtie/${BOWTIE_VERSION}/bowtie-${BOWTIE_VERSION}-linux-x86_64.zip/download\ + \ && \\\nunzip bowtie-${BOWTIE_VERSION}-linux-x86_64.zip && \\\ncp bowtie-${BOWTIE_VERSION}-linux-x86_64/bowtie*\ + \ /usr/local/bin && \\\ncd /tmp && \\\ngit clone https://github.com/DaehwanKimLab/hisat2.git\ + \ /tmp/hisat2 && \\\ncd /tmp/hisat2 && \\\nmake && \\\ncp -r hisat2* /usr/local/bin\ + \ && \\\ncd && \\\nrm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ + \ /tmp/bowtie-${BOWTIE_VERSION}-linux-x86_64 /tmp/hisat2 && \\\napt-get --purge\ + \ autoremove -y ${PACKAGES} && \\\napt-get clean \n" + env: + - "STAR_VERSION=2.7.11b" + - "RSEM_VERSION=1.3.3" + - "BOWTIE_VERSION=1.3.1" + - "TZ=Europe/Brussels" + - type: "docker" + run: + - "echo \"RSEM: `rsem-calculate-expression --version | sed -e 's/Current version:\ + \ RSEM v//g'`\" > /var/software_versions.txt && \\\necho \"STAR: `STAR --version`\"\ + \ >> /var/software_versions.txt && \\\necho \"bowtie2: `bowtie2 --version |\ + \ grep -oP '\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho\ + \ \"bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \\K\\d+\\.\\\ + d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho \"HISAT2: `hisat2 --version\ + \ | grep -oP 'hisat2-align-s version \\K\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rsem/rsem_prepare_reference/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/rsem/rsem_prepare_reference" + executable: "target/nextflow/rsem/rsem_prepare_reference/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/main.nf new file mode 100644 index 0000000..f87d205 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/main.nf @@ -0,0 +1,4106 @@ +// rsem_prepare_reference v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Sai Nirmayi Yasa (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "rsem_prepare_reference", + "namespace" : "rsem", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Sai Nirmayi Yasa", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "nirmayi@data-intuitive.com", + "github" : "sainirmayi", + "linkedin" : "sai-nirmayi-yasa" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Junior Bioinformatics Researcher" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--reference_fasta_files", + "description" : "Semi-colon separated list of Multi-FASTA formatted files OR a directory name. If a directory name is specified, RSEM will read all files with suffix \\".fa\\" or \\".fasta\\" in this directory. The files should contain either the sequences of transcripts or an entire genome, depending on whether the '--gtf' option is used.\n", + "example" : [ + "read1.fasta" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--reference_name", + "description" : "The name of the reference used. RSEM will generate several reference-related files that are prefixed by this name. This name can contain path information (e.g. '/ref/mm9').\n", + "example" : [ + "/ref/mm9" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Directory containing reference files generated by RSEM.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Other options", + "arguments" : [ + { + "type" : "file", + "name" : "--gtf", + "description" : "Assume that 'reference_fasta_files' contains the sequence of a genome, and extract transcript reference sequences using the gene annotations specified in the GTF file. If this and '--gff3' options are not provided, RSEM will assume 'reference_fasta_files' contains the reference transcripts. In this case, RSEM assumes that name of each sequence in the Multi-FASTA files is its transcript_id.", + "example" : [ + "annotations.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gff3", + "description" : "GFF3 annotation file. Converted to GTF format with the file name 'reference_name.gtf'. Please make sure that 'reference_name.gtf' does not exist.", + "example" : [ + "annotations.gff" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--gff3_rna_patterns", + "description" : "List of transcript categories (separated by semi-colon). Only transcripts that match the string will be extracted.", + "example" : [ + "mRNA;rRNA" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--gff3_genes_as_transcripts", + "description" : "This option is designed for untypical organisms, such as viruses, whose GFF3 files only contain genes. RSEM will assume each gene as a unique transcript when it converts the GFF3 file into GTF format.", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--trusted_sources", + "description" : "List of trusted sources (separated by semi-colon). Only transcripts coming from these sources will be extracted. If this option is off, all sources are accepted.", + "example" : [ + "ENSEMBL;HAVANA" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcript_to_gene_map", + "description" : "Use information from this file to map from transcript (isoform) ids to gene ids. Each line of this file should be of the form: \n gene_id transcript_id\nwith the two fields separated by a tab character.\nIf you are using a GTF file for the \\"UCSC Genes\\" gene set from the UCSC Genome Browser, then the \\"knownIsoforms.txt\\" file (obtained from the \\"Downloads\\" section of the UCSC Genome Browser site) is of this format. \nIf this option is off, then the mapping of isoforms to genes depends on whether the '--gtf' option is specified. If '--gtf' is specified, then RSEM uses the \\"gene_id\\" and \\"transcript_id\\" attributes in the GTF file. Otherwise, RSEM assumes that each sequence in the reference sequence files is a separate gene.\n", + "example" : [ + "isoforms.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--allele_to_gene_map", + "description" : "Use information from to provide gene_id and transcript_id information for each allele-specific transcript. Each line of should be of the form:\n gene_id transcript_id allele_id\nwith the fields separated by a tab character.\nThis option is designed for quantifying allele-specific expression. It is only valid if '--gtf' option is not specified. allele_id should be the sequence names presented in the Multi-FASTA-formatted files.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--polyA", + "description" : "Add poly(A) tails to the end of all reference isoforms. The length of poly(A) tail added is specified by '--polyA-length' option. STAR aligner users may not want to use this option.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--polyA_length", + "description" : "The length of the poly(A) tails to be added.", + "example" : [ + 125 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--no_polyA_subset", + "description" : "Only meaningful if '--polyA' is specified. Do not add poly(A) tails to those transcripts listed in this file containing a list of transcript_ids.", + "example" : [ + "transcript_ids.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--bowtie", + "description" : "Build Bowtie indices.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--bowtie2", + "description" : "Build Bowtie 2 indices.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--star", + "description" : "Build STAR indices.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--star_sjdboverhang", + "description" : "Length of the genomic sequence around annotated junction. It is only used for STAR to build splice junctions database and not needed for Bowtie or Bowtie2. It will be passed as the --sjdbOverhang option to STAR. According to STAR's manual, its ideal value is max(ReadLength)-1, e.g. for 2x101 paired-end reads, the ideal value is 101-1=100. In most cases, the default value of 100 will work as well as the ideal value. (Default is 100)", + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--hisat2_hca", + "description" : "Build HISAT2 indices on the transcriptome according to Human Cell Atlas (HCA) SMART-Seq2 pipeline.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--quiet", + "alternatives" : [ + "-q" + ], + "description" : "Suppress the output of logging information.", + "direction" : "input" + } + ] + }, + { + "name" : "Prior-enhanced RSEM options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--prep_pRSEM", + "description" : "A Boolean indicating whether to prepare reference files for pRSEM, including building Bowtie indices for a genome and selecting training set isoforms. The index files will be used for aligning ChIP-seq reads in prior-enhanced RSEM and the training set isoforms will be used for learning prior. A path to Bowtie executables and a mappability file in bigWig format are required when this option is on. Currently, Bowtie2 is not supported for prior-enhanced RSEM.", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--mappability_bigwig_file", + "description" : "Full path to a whole-genome mappability file in bigWig format. This file is required for running prior-enhanced RSEM. It is used for selecting a training set of isoforms for prior-learning. This file can be either downloaded from UCSC Genome Browser or generated by GEM (Derrien et al., 2012, PLoS One).", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "RSEM is a software package for estimating gene and isoform expression levels from RNA-Seq data. This component prepares transcript references for RSEM.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "Transcriptome", + "Index" + ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.1186/1471-2105-12-323" + ] + }, + "links" : { + "repository" : "https://github.com/deweylab/RSEM", + "homepage" : "http://deweylab.github.io/RSEM", + "documentation" : "https://deweylab.github.io/RSEM/rsem-prepare-reference.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "build-essential", + "gcc", + "g++", + "make", + "wget", + "zlib1g-dev", + "unzip xxd", + "perl", + "r-base", + "bowtie2", + "pip", + "git" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "bowtie" + ], + "upgrade" : true + }, + { + "type" : "docker", + "run" : [ + "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \\\\\ncd /tmp && \\\\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\nunzip ${STAR_VERSION}.zip && \\\\\ncd STAR-${STAR_VERSION}/source && \\\\\nmake STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\ncp STAR /usr/local/bin && \\\\\ncd /tmp && \\\\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip && \\\\\nunzip v${RSEM_VERSION}.zip && \\\\\ncd RSEM-${RSEM_VERSION} && \\\\\nmake && \\\\\nmake install && \\\\\ncd /tmp && \\\\\nwget --no-check-certificate -O bowtie-${BOWTIE_VERSION}-linux-x86_64.zip https://sourceforge.net/projects/bowtie-bio/files/bowtie/${BOWTIE_VERSION}/bowtie-${BOWTIE_VERSION}-linux-x86_64.zip/download && \\\\\nunzip bowtie-${BOWTIE_VERSION}-linux-x86_64.zip && \\\\\ncp bowtie-${BOWTIE_VERSION}-linux-x86_64/bowtie* /usr/local/bin && \\\\\ncd /tmp && \\\\\ngit clone https://github.com/DaehwanKimLab/hisat2.git /tmp/hisat2 && \\\\\ncd /tmp/hisat2 && \\\\\nmake && \\\\\ncp -r hisat2* /usr/local/bin && \\\\\ncd && \\\\\nrm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip /tmp/bowtie-${BOWTIE_VERSION}-linux-x86_64 /tmp/hisat2 && \\\\\napt-get --purge autoremove -y ${PACKAGES} && \\\\\napt-get clean \n" + ], + "env" : [ + "STAR_VERSION=2.7.11b", + "RSEM_VERSION=1.3.3", + "BOWTIE_VERSION=1.3.1", + "TZ=Europe/Brussels" + ] + }, + { + "type" : "docker", + "run" : [ + "echo \\"RSEM: `rsem-calculate-expression --version | sed -e 's/Current version: RSEM v//g'`\\" > /var/software_versions.txt && \\\\\necho \\"STAR: `STAR --version`\\" >> /var/software_versions.txt && \\\\\necho \\"bowtie2: `bowtie2 --version | grep -oP '\\\\d+\\\\.\\\\d+\\\\.\\\\d+'`\\" >> /var/software_versions.txt && \\\\\necho \\"bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \\\\K\\\\d+\\\\.\\\\d+\\\\.\\\\d+'`\\" >> /var/software_versions.txt && \\\\\necho \\"HISAT2: `hisat2 --version | grep -oP 'hisat2-align-s version \\\\K\\\\d+\\\\.\\\\d+\\\\.\\\\d+'`\\" >> /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/rsem/rsem_prepare_reference/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/rsem/rsem_prepare_reference", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_REFERENCE_FASTA_FILES+x} ]; then echo "${VIASH_PAR_REFERENCE_FASTA_FILES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reference_fasta_files='&'#" ; else echo "# par_reference_fasta_files="; fi ) +$( if [ ! -z ${VIASH_PAR_REFERENCE_NAME+x} ]; then echo "${VIASH_PAR_REFERENCE_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reference_name='&'#" ; else echo "# par_reference_name="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_GFF3+x} ]; then echo "${VIASH_PAR_GFF3}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gff3='&'#" ; else echo "# par_gff3="; fi ) +$( if [ ! -z ${VIASH_PAR_GFF3_RNA_PATTERNS+x} ]; then echo "${VIASH_PAR_GFF3_RNA_PATTERNS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gff3_rna_patterns='&'#" ; else echo "# par_gff3_rna_patterns="; fi ) +$( if [ ! -z ${VIASH_PAR_GFF3_GENES_AS_TRANSCRIPTS+x} ]; then echo "${VIASH_PAR_GFF3_GENES_AS_TRANSCRIPTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gff3_genes_as_transcripts='&'#" ; else echo "# par_gff3_genes_as_transcripts="; fi ) +$( if [ ! -z ${VIASH_PAR_TRUSTED_SOURCES+x} ]; then echo "${VIASH_PAR_TRUSTED_SOURCES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trusted_sources='&'#" ; else echo "# par_trusted_sources="; fi ) +$( if [ ! -z ${VIASH_PAR_TRANSCRIPT_TO_GENE_MAP+x} ]; then echo "${VIASH_PAR_TRANSCRIPT_TO_GENE_MAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcript_to_gene_map='&'#" ; else echo "# par_transcript_to_gene_map="; fi ) +$( if [ ! -z ${VIASH_PAR_ALLELE_TO_GENE_MAP+x} ]; then echo "${VIASH_PAR_ALLELE_TO_GENE_MAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_allele_to_gene_map='&'#" ; else echo "# par_allele_to_gene_map="; fi ) +$( if [ ! -z ${VIASH_PAR_POLYA+x} ]; then echo "${VIASH_PAR_POLYA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_polyA='&'#" ; else echo "# par_polyA="; fi ) +$( if [ ! -z ${VIASH_PAR_POLYA_LENGTH+x} ]; then echo "${VIASH_PAR_POLYA_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_polyA_length='&'#" ; else echo "# par_polyA_length="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_POLYA_SUBSET+x} ]; then echo "${VIASH_PAR_NO_POLYA_SUBSET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_polyA_subset='&'#" ; else echo "# par_no_polyA_subset="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE+x} ]; then echo "${VIASH_PAR_BOWTIE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie='&'#" ; else echo "# par_bowtie="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE2+x} ]; then echo "${VIASH_PAR_BOWTIE2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie2='&'#" ; else echo "# par_bowtie2="; fi ) +$( if [ ! -z ${VIASH_PAR_STAR+x} ]; then echo "${VIASH_PAR_STAR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_star='&'#" ; else echo "# par_star="; fi ) +$( if [ ! -z ${VIASH_PAR_STAR_SJDBOVERHANG+x} ]; then echo "${VIASH_PAR_STAR_SJDBOVERHANG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_star_sjdboverhang='&'#" ; else echo "# par_star_sjdboverhang="; fi ) +$( if [ ! -z ${VIASH_PAR_HISAT2_HCA+x} ]; then echo "${VIASH_PAR_HISAT2_HCA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_hisat2_hca='&'#" ; else echo "# par_hisat2_hca="; fi ) +$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) +$( if [ ! -z ${VIASH_PAR_PREP_PRSEM+x} ]; then echo "${VIASH_PAR_PREP_PRSEM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_prep_pRSEM='&'#" ; else echo "# par_prep_pRSEM="; fi ) +$( if [ ! -z ${VIASH_PAR_MAPPABILITY_BIGWIG_FILE+x} ]; then echo "${VIASH_PAR_MAPPABILITY_BIGWIG_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mappability_bigwig_file='&'#" ; else echo "# par_mappability_bigwig_file="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +unset_if_false=( par_gff3_genes_as_transcripts par_polyA par_bowtie par_bowtie2 par_star par_hisat2_hca par_quiet par_prep_pRSEM ) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +# replace ';' with ',' +par_reference_fasta_files=\\$(echo \\$par_reference_fasta_files | tr ';' ',') +par_gff3_rna_patterns=\\$(echo \\$par_gff3_rna_patterns | tr ';' ',') +par_trusted_sources=\\$(echo \\$par_trusted_sources | tr ';' ',') + +echo "\\$par_reference_fasta_files" +rsem-prepare-reference \\\\ + \\${par_gtf:+--gtf "\\${par_gtf}"} \\\\ + \\${par_gff3:+--gff3 "\\${par_gff3}"} \\\\ + \\${par_gff3_rna_patterns:+--gff3-RNA-patterns "\\${par_gff3_rna_patterns}"} \\\\ + \\${par_gff3_genes_as_transcripts:+--gff3-genes-as-transcripts "\\${par_gff3_genes_as_transcripts}"} \\\\ + \\${par_trusted_sources:+--trusted-sources "\\${par_trusted_sources}"} \\\\ + \\${par_transcript_to_gene_map:+--transcript-to-gene-map "\\${par_transcript_to_gene_map}"} \\\\ + \\${par_allele_to_gene_map:+--allele-to-gene-map "\\${par_allele_to_gene_map}"} \\\\ + \\${par_polyA:+--polyA} \\\\ + \\${par_polyA_length:+--polyA-length "\\${par_polyA_length}"} \\\\ + \\${par_no_polyA_subset:+--no-polyA-subset "\\${par_no_polyA_subset}"} \\\\ + \\${par_bowtie:+--bowtie} \\\\ + \\${par_bowtie2:+--bowtie2} \\\\ + \\${par_star:+--star} \\\\ + \\${par_star_sjdboverhang:+--star-sjdboverhang "\\${par_star_sjdboverhang}"} \\\\ + \\${par_hisat2_hca:+--hisat2-hca} \\\\ + \\${par_quiet:+--quiet} \\\\ + \\${par_prep_pRSEM:+--prep-pRSEM} \\\\ + \\${par_mappability_bigwig_file:+--mappability-bigwig-file "\\${par_mappability_bigwig_file}"} \\\\ + \\${meta_cpus:+--num-threads "\\${meta_cpus}"} \\\\ + "\\${par_reference_fasta_files}" \\\\ + "\\${par_reference_name}" + +mkdir -p "\\${par_output}" +mv \\${par_reference_name}.* "\\${par_output}/" +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/rsem/rsem_prepare_reference", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/nextflow.config new file mode 100644 index 0000000..72abace --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'rsem/rsem_prepare_reference' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'RSEM is a software package for estimating gene and isoform expression levels from RNA-Seq data. This component prepares transcript references for RSEM.\n' + author = 'Sai Nirmayi Yasa' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/nextflow_schema.json new file mode 100644 index 0000000..1d5c516 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/nextflow_schema.json @@ -0,0 +1,321 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "rsem_prepare_reference", +"description": "RSEM is a software package for estimating gene and isoform expression levels from RNA-Seq data. This component prepares transcript references for RSEM.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "reference_fasta_files": { + "type": + "string", + "description": "Type: List of `file`, required, example: `read1.fasta`, multiple_sep: `\";\"`. Semi-colon separated list of Multi-FASTA formatted files OR a directory name", + "help_text": "Type: List of `file`, required, example: `read1.fasta`, multiple_sep: `\";\"`. Semi-colon separated list of Multi-FASTA formatted files OR a directory name. If a directory name is specified, RSEM will read all files with suffix \".fa\" or \".fasta\" in this directory. The files should contain either the sequences of transcripts or an entire genome, depending on whether the \u0027--gtf\u0027 option is used.\n" + + } + + + , + "reference_name": { + "type": + "string", + "description": "Type: `string`, required, example: `/ref/mm9`. The name of the reference used", + "help_text": "Type: `string`, required, example: `/ref/mm9`. The name of the reference used. RSEM will generate several reference-related files that are prefixed by this name. This name can contain path information (e.g. \u0027/ref/mm9\u0027).\n" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output`. Directory containing reference files generated by RSEM", + "help_text": "Type: `file`, required, default: `$id.$key.output`. Directory containing reference files generated by RSEM." + , + "default":"$id.$key.output" + } + + +} +}, + + + "other options" : { + "title": "Other options", + "type": "object", + "description": "No description", + "properties": { + + + "gtf": { + "type": + "string", + "description": "Type: `file`, example: `annotations.gtf`. Assume that \u0027reference_fasta_files\u0027 contains the sequence of a genome, and extract transcript reference sequences using the gene annotations specified in the GTF file", + "help_text": "Type: `file`, example: `annotations.gtf`. Assume that \u0027reference_fasta_files\u0027 contains the sequence of a genome, and extract transcript reference sequences using the gene annotations specified in the GTF file. If this and \u0027--gff3\u0027 options are not provided, RSEM will assume \u0027reference_fasta_files\u0027 contains the reference transcripts. In this case, RSEM assumes that name of each sequence in the Multi-FASTA files is its transcript_id." + + } + + + , + "gff3": { + "type": + "string", + "description": "Type: `file`, example: `annotations.gff`. GFF3 annotation file", + "help_text": "Type: `file`, example: `annotations.gff`. GFF3 annotation file. Converted to GTF format with the file name \u0027reference_name.gtf\u0027. Please make sure that \u0027reference_name.gtf\u0027 does not exist." + + } + + + , + "gff3_rna_patterns": { + "type": + "string", + "description": "Type: List of `string`, example: `mRNA;rRNA`, multiple_sep: `\";\"`. List of transcript categories (separated by semi-colon)", + "help_text": "Type: List of `string`, example: `mRNA;rRNA`, multiple_sep: `\";\"`. List of transcript categories (separated by semi-colon). Only transcripts that match the string will be extracted." + + } + + + , + "gff3_genes_as_transcripts": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. This option is designed for untypical organisms, such as viruses, whose GFF3 files only contain genes", + "help_text": "Type: `boolean_true`, default: `false`. This option is designed for untypical organisms, such as viruses, whose GFF3 files only contain genes. RSEM will assume each gene as a unique transcript when it converts the GFF3 file into GTF format." + , + "default":false + } + + + , + "trusted_sources": { + "type": + "string", + "description": "Type: List of `string`, example: `ENSEMBL;HAVANA`, multiple_sep: `\";\"`. List of trusted sources (separated by semi-colon)", + "help_text": "Type: List of `string`, example: `ENSEMBL;HAVANA`, multiple_sep: `\";\"`. List of trusted sources (separated by semi-colon). Only transcripts coming from these sources will be extracted. If this option is off, all sources are accepted." + + } + + + , + "transcript_to_gene_map": { + "type": + "string", + "description": "Type: `file`, example: `isoforms.txt`. Use information from this file to map from transcript (isoform) ids to gene ids", + "help_text": "Type: `file`, example: `isoforms.txt`. Use information from this file to map from transcript (isoform) ids to gene ids. Each line of this file should be of the form: \n gene_id transcript_id\nwith the two fields separated by a tab character.\nIf you are using a GTF file for the \"UCSC Genes\" gene set from the UCSC Genome Browser, then the \"knownIsoforms.txt\" file (obtained from the \"Downloads\" section of the UCSC Genome Browser site) is of this format. \nIf this option is off, then the mapping of isoforms to genes depends on whether the \u0027--gtf\u0027 option is specified. If \u0027--gtf\u0027 is specified, then RSEM uses the \"gene_id\" and \"transcript_id\" attributes in the GTF file. Otherwise, RSEM assumes that each sequence in the reference sequence files is a separate gene.\n" + + } + + + , + "allele_to_gene_map": { + "type": + "string", + "description": "Type: `file`. Use information from \u003cfile\u003e to provide gene_id and transcript_id information for each allele-specific transcript", + "help_text": "Type: `file`. Use information from \u003cfile\u003e to provide gene_id and transcript_id information for each allele-specific transcript. Each line of \u003cfile\u003e should be of the form:\n gene_id transcript_id allele_id\nwith the fields separated by a tab character.\nThis option is designed for quantifying allele-specific expression. It is only valid if \u0027--gtf\u0027 option is not specified. allele_id should be the sequence names presented in the Multi-FASTA-formatted files.\n" + + } + + + , + "polyA": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Add poly(A) tails to the end of all reference isoforms", + "help_text": "Type: `boolean_true`, default: `false`. Add poly(A) tails to the end of all reference isoforms. The length of poly(A) tail added is specified by \u0027--polyA-length\u0027 option. STAR aligner users may not want to use this option." + , + "default":false + } + + + , + "polyA_length": { + "type": + "integer", + "description": "Type: `integer`, example: `125`. The length of the poly(A) tails to be added", + "help_text": "Type: `integer`, example: `125`. The length of the poly(A) tails to be added." + + } + + + , + "no_polyA_subset": { + "type": + "string", + "description": "Type: `file`, example: `transcript_ids.txt`. Only meaningful if \u0027--polyA\u0027 is specified", + "help_text": "Type: `file`, example: `transcript_ids.txt`. Only meaningful if \u0027--polyA\u0027 is specified. Do not add poly(A) tails to those transcripts listed in this file containing a list of transcript_ids." + + } + + + , + "bowtie": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Build Bowtie indices", + "help_text": "Type: `boolean_true`, default: `false`. Build Bowtie indices." + , + "default":false + } + + + , + "bowtie2": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Build Bowtie 2 indices", + "help_text": "Type: `boolean_true`, default: `false`. Build Bowtie 2 indices." + , + "default":false + } + + + , + "star": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Build STAR indices", + "help_text": "Type: `boolean_true`, default: `false`. Build STAR indices." + , + "default":false + } + + + , + "star_sjdboverhang": { + "type": + "integer", + "description": "Type: `integer`, example: `100`. Length of the genomic sequence around annotated junction", + "help_text": "Type: `integer`, example: `100`. Length of the genomic sequence around annotated junction. It is only used for STAR to build splice junctions database and not needed for Bowtie or Bowtie2. It will be passed as the --sjdbOverhang option to STAR. According to STAR\u0027s manual, its ideal value is max(ReadLength)-1, e.g. for 2x101 paired-end reads, the ideal value is 101-1=100. In most cases, the default value of 100 will work as well as the ideal value. (Default is 100)" + + } + + + , + "hisat2_hca": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Build HISAT2 indices on the transcriptome according to Human Cell Atlas (HCA) SMART-Seq2 pipeline", + "help_text": "Type: `boolean_true`, default: `false`. Build HISAT2 indices on the transcriptome according to Human Cell Atlas (HCA) SMART-Seq2 pipeline." + , + "default":false + } + + + , + "quiet": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Suppress the output of logging information", + "help_text": "Type: `boolean_true`, default: `false`. Suppress the output of logging information." + , + "default":false + } + + +} +}, + + + "prior-enhanced rsem options" : { + "title": "Prior-enhanced RSEM options", + "type": "object", + "description": "No description", + "properties": { + + + "prep_pRSEM": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. A Boolean indicating whether to prepare reference files for pRSEM, including building Bowtie indices for a genome and selecting training set isoforms", + "help_text": "Type: `boolean_true`, default: `false`. A Boolean indicating whether to prepare reference files for pRSEM, including building Bowtie indices for a genome and selecting training set isoforms. The index files will be used for aligning ChIP-seq reads in prior-enhanced RSEM and the training set isoforms will be used for learning prior. A path to Bowtie executables and a mappability file in bigWig format are required when this option is on. Currently, Bowtie2 is not supported for prior-enhanced RSEM." + , + "default":false + } + + + , + "mappability_bigwig_file": { + "type": + "string", + "description": "Type: `file`. Full path to a whole-genome mappability file in bigWig format", + "help_text": "Type: `file`. Full path to a whole-genome mappability file in bigWig format. This file is required for running prior-enhanced RSEM. It is used for selecting a training set of isoforms for prior-learning. This file can be either downloaded from UCSC Genome Browser or generated by GEM (Derrien et al., 2012, PLoS One)." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/other options" + }, + + { + "$ref": "#/definitions/prior-enhanced rsem options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml new file mode 100644 index 0000000..43538ac --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml @@ -0,0 +1,216 @@ +name: "rseqc_bamstat" +namespace: "rseqc" +version: "v0.3.1" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input_file" + alternatives: + - "-i" + description: "Input alignment file in BAM or SAM format." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--mapq" + alternatives: + - "-q" + description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ + \ reads. Default: '30'.\n" + info: null + example: + - 30 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + description: "Output file (txt) with mapping quality statistics." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Generate statistics from a bam file." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "rnaseq" +- "genomics" +license: "GPL-3.0" +references: + doi: + - "10.1093/bioinformatics/bts356" +links: + repository: "https://github.com/MonashBioinformaticsPlatform/RSeQC" + homepage: "https://rseqc.sourceforge.net/" + documentation: "https://rseqc.sourceforge.net/#bam-stat-py" + issue_tracker: "https://github.com/MonashBioinformaticsPlatform/RSeQC/issues" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python:3.10" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "python" + user: false + packages: + - "RSeQC" + upgrade: true + - type: "docker" + run: + - "echo \"RSeQC bam_stat.py: $(bam_stat.py --version | cut -d' ' -f2-)\" > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rseqc/rseqc_bamstat/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/rseqc/rseqc_bamstat" + executable: "target/nextflow/rseqc/rseqc_bamstat/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/main.nf new file mode 100644 index 0000000..48bea7b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/main.nf @@ -0,0 +1,3846 @@ +// rseqc_bamstat v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "rseqc_bamstat", + "namespace" : "rseqc", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input_file", + "alternatives" : [ + "-i" + ], + "description" : "Input alignment file in BAM or SAM format.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--mapq", + "alternatives" : [ + "-q" + ], + "description" : "Minimum mapping quality (phred scaled) to determine uniquely mapped reads. Default: '30'.\n", + "example" : [ + 30 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Output file (txt) with mapping quality statistics.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Generate statistics from a bam file.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "rnaseq", + "genomics" + ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.1093/bioinformatics/bts356" + ] + }, + "links" : { + "repository" : "https://github.com/MonashBioinformaticsPlatform/RSeQC", + "homepage" : "https://rseqc.sourceforge.net/", + "documentation" : "https://rseqc.sourceforge.net/#bam-stat-py", + "issue_tracker" : "https://github.com/MonashBioinformaticsPlatform/RSeQC/issues" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "RSeQC" + ], + "upgrade" : true + }, + { + "type" : "docker", + "run" : [ + "echo \\"RSeQC bam_stat.py: $(bam_stat.py --version | cut -d' ' -f2-)\\" > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/rseqc/rseqc_bamstat/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/rseqc/rseqc_bamstat", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT_FILE+x} ]; then echo "${VIASH_PAR_INPUT_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_file='&'#" ; else echo "# par_input_file="; fi ) +$( if [ ! -z ${VIASH_PAR_MAPQ+x} ]; then echo "${VIASH_PAR_MAPQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mapq='&'#" ; else echo "# par_mapq="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + + +set -eo pipefail + +bam_stat.py \\\\ + --input-file "\\${par_input_file}" \\\\ + \\${par_mapq:+--mapq "\\${par_mapq}"} \\\\ +> \\$par_output +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/rseqc/rseqc_bamstat", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/nextflow.config new file mode 100644 index 0000000..228d21c --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'rseqc/rseqc_bamstat' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Generate statistics from a bam file.' + author = 'Emma Rousseau' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json new file mode 100644 index 0000000..4b31a3d --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json @@ -0,0 +1,105 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "rseqc_bamstat", +"description": "Generate statistics from a bam file.", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input_file": { + "type": + "string", + "description": "Type: `file`, required. Input alignment file in BAM or SAM format", + "help_text": "Type: `file`, required. Input alignment file in BAM or SAM format." + + } + + + , + "mapq": { + "type": + "integer", + "description": "Type: `integer`, example: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads", + "help_text": "Type: `integer`, example: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads. Default: \u002730\u0027.\n" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output`. Output file (txt) with mapping quality statistics", + "help_text": "Type: `file`, default: `$id.$key.output`. Output file (txt) with mapping quality statistics." + , + "default":"$id.$key.output" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml new file mode 100644 index 0000000..5f0b9ad --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml @@ -0,0 +1,242 @@ +name: "rseqc_inferexperiment" +namespace: "rseqc" +version: "v0.3.1" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input_file" + alternatives: + - "-i" + description: "input alignment file in BAM or SAM format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--refgene" + alternatives: + - "-r" + description: "Reference gene model in bed format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + description: "Output file (txt) of strandness report." + info: null + example: + - "$id.strandedness.txt" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Options" + arguments: + - type: "integer" + name: "--sample_size" + alternatives: + - "-s" + description: "Number of reads sampled from SAM/BAM file. Default: 200000\n" + info: null + example: + - 200000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--mapq" + alternatives: + - "-q" + description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ + \ reads. Default: 30\n" + info: null + example: + - 30 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Infer strandedness from sequencing reads\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +license: "GPL-3.0" +references: + doi: + - "10.1093/bioinformatics/bts356" +links: + repository: "https://github.com/MonashBioinformaticsPlatform/RSeQC" + homepage: "https://rseqc.sourceforge.net/" + documentation: "https://rseqc.sourceforge.net/#infer-experiment-py" + issue_tracker: "https://github.com/MonashBioinformaticsPlatform/RSeQC/issues" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python:3.10" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "python" + user: false + packages: + - "RSeQC" + upgrade: true + - type: "docker" + run: + - "echo \"RSeQC - infer_experiment.py: $(infer_experiment.py --version | cut -d'\ + \ ' -f2)\" > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rseqc/rseqc_inferexperiment/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/rseqc/rseqc_inferexperiment" + executable: "target/nextflow/rseqc/rseqc_inferexperiment/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/main.nf new file mode 100644 index 0000000..a3f6b09 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/main.nf @@ -0,0 +1,3882 @@ +// rseqc_inferexperiment v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "rseqc_inferexperiment", + "namespace" : "rseqc", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input_file", + "alternatives" : [ + "-i" + ], + "description" : "input alignment file in BAM or SAM format", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--refgene", + "alternatives" : [ + "-r" + ], + "description" : "Reference gene model in bed format", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Output file (txt) of strandness report.", + "example" : [ + "$id.strandedness.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "integer", + "name" : "--sample_size", + "alternatives" : [ + "-s" + ], + "description" : "Number of reads sampled from SAM/BAM file. Default: 200000\n", + "example" : [ + 200000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--mapq", + "alternatives" : [ + "-q" + ], + "description" : "Minimum mapping quality (phred scaled) to determine uniquely mapped reads. Default: 30\n", + "example" : [ + 30 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Infer strandedness from sequencing reads\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.1093/bioinformatics/bts356" + ] + }, + "links" : { + "repository" : "https://github.com/MonashBioinformaticsPlatform/RSeQC", + "homepage" : "https://rseqc.sourceforge.net/", + "documentation" : "https://rseqc.sourceforge.net/#infer-experiment-py", + "issue_tracker" : "https://github.com/MonashBioinformaticsPlatform/RSeQC/issues" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "RSeQC" + ], + "upgrade" : true + }, + { + "type" : "docker", + "run" : [ + "echo \\"RSeQC - infer_experiment.py: $(infer_experiment.py --version | cut -d' ' -f2)\\" > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/rseqc/rseqc_inferexperiment/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/rseqc/rseqc_inferexperiment", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT_FILE+x} ]; then echo "${VIASH_PAR_INPUT_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_file='&'#" ; else echo "# par_input_file="; fi ) +$( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_SIZE+x} ]; then echo "${VIASH_PAR_SAMPLE_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_size='&'#" ; else echo "# par_sample_size="; fi ) +$( if [ ! -z ${VIASH_PAR_MAPQ+x} ]; then echo "${VIASH_PAR_MAPQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mapq='&'#" ; else echo "# par_mapq="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +infer_experiment.py \\\\ + -i \\$par_input_file \\\\ + -r \\$par_refgene \\\\ + \\${par_sample_size:+-s "\\${par_sample_size}"} \\\\ + \\${par_mapq:+-q "\\${par_mapq}"} \\\\ +> \\$par_output +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/rseqc/rseqc_inferexperiment", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/nextflow.config new file mode 100644 index 0000000..ebaf2bc --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'rseqc/rseqc_inferexperiment' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Infer strandedness from sequencing reads\n' + author = 'Emma Rousseau' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/nextflow_schema.json new file mode 100644 index 0000000..1ab703c --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/nextflow_schema.json @@ -0,0 +1,139 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "rseqc_inferexperiment", +"description": "Infer strandedness from sequencing reads\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input_file": { + "type": + "string", + "description": "Type: `file`, required. input alignment file in BAM or SAM format", + "help_text": "Type: `file`, required. input alignment file in BAM or SAM format" + + } + + + , + "refgene": { + "type": + "string", + "description": "Type: `file`, required. Reference gene model in bed format", + "help_text": "Type: `file`, required. Reference gene model in bed format" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.txt`, example: `$id.strandedness.txt`. Output file (txt) of strandness report", + "help_text": "Type: `file`, required, default: `$id.$key.output.txt`, example: `$id.strandedness.txt`. Output file (txt) of strandness report." + , + "default":"$id.$key.output.txt" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "sample_size": { + "type": + "integer", + "description": "Type: `integer`, example: `200000`. Number of reads sampled from SAM/BAM file", + "help_text": "Type: `integer`, example: `200000`. Number of reads sampled from SAM/BAM file. Default: 200000\n" + + } + + + , + "mapq": { + "type": + "integer", + "description": "Type: `integer`, example: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads", + "help_text": "Type: `integer`, example: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads. Default: 30\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml new file mode 100644 index 0000000..fd92879 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml @@ -0,0 +1,335 @@ +name: "rseqc_inner_distance" +namespace: "rseqc" +version: "v0.3.1" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input_file" + alternatives: + - "-i" + description: "input alignment file in BAM or SAM format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--refgene" + alternatives: + - "-r" + description: "Reference gene model in bed format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sample_size" + alternatives: + - "-k" + description: "Numer of reads sampled from SAM/BAM file, default = 1000000." + info: null + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--mapq" + alternatives: + - "-q" + description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ + \ reads, default=30." + info: null + example: + - 30 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--lower_bound" + alternatives: + - "-l" + description: "Lower bound of inner distance (bp). This option is used for ploting\ + \ histograme, default=-250." + info: null + example: + - -250 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--upper_bound" + alternatives: + - "-u" + description: "Upper bound of inner distance (bp). This option is used for ploting\ + \ histograme, default=250." + info: null + example: + - 250 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--step" + alternatives: + - "-s" + description: "Step size (bp) of histograme. This option is used for plotting histogram,\ + \ default=5." + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "string" + name: "--output_prefix" + alternatives: + - "-o" + description: "Rrefix of output files." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_stats" + description: "output file (txt) with summary statistics of inner distances of\ + \ paired reads" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_dist" + description: "output file (txt) with inner distances of all paired reads" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_freq" + description: "output file (txt) with frequencies of inner distances of all paired\ + \ reads" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_plot" + description: "output file (pdf) with histogram plot of of inner distances of all\ + \ paired reads" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_plot_r" + description: "output file (R) with script of histogram plot of of inner distances\ + \ of all paired reads" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Calculate inner distance between read pairs.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +license: "GPL-3.0" +references: + doi: + - "10.1093/bioinformatics/bts356" +links: + repository: "https://github.com/MonashBioinformaticsPlatform/RSeQC" + homepage: "https://rseqc.sourceforge.net/" + documentation: "https://rseqc.sourceforge.net/#inner-distance-py" + issue_tracker: "https://github.com/MonashBioinformaticsPlatform/RSeQC/issues" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python:3.10" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "r-base" + interactive: false + - type: "python" + user: false + packages: + - "RSeQC" + upgrade: true + - type: "docker" + run: + - "echo \"RSeQC - inner_distance.py: $(inner_distance.py --version | cut -d' '\ + \ -f2)\" > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rseqc/rseqc_inner_distance/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/rseqc/rseqc_inner_distance" + executable: "target/nextflow/rseqc/rseqc_inner_distance/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/main.nf new file mode 100644 index 0000000..338bd57 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/main.nf @@ -0,0 +1,4005 @@ +// rseqc_inner_distance v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "rseqc_inner_distance", + "namespace" : "rseqc", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input_file", + "alternatives" : [ + "-i" + ], + "description" : "input alignment file in BAM or SAM format", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--refgene", + "alternatives" : [ + "-r" + ], + "description" : "Reference gene model in bed format", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sample_size", + "alternatives" : [ + "-k" + ], + "description" : "Numer of reads sampled from SAM/BAM file, default = 1000000.", + "example" : [ + 1000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--mapq", + "alternatives" : [ + "-q" + ], + "description" : "Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30.", + "example" : [ + 30 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--lower_bound", + "alternatives" : [ + "-l" + ], + "description" : "Lower bound of inner distance (bp). This option is used for ploting histograme, default=-250.", + "example" : [ + -250 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--upper_bound", + "alternatives" : [ + "-u" + ], + "description" : "Upper bound of inner distance (bp). This option is used for ploting histograme, default=250.", + "example" : [ + 250 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--step", + "alternatives" : [ + "-s" + ], + "description" : "Step size (bp) of histograme. This option is used for plotting histogram, default=5.", + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "string", + "name" : "--output_prefix", + "alternatives" : [ + "-o" + ], + "description" : "Rrefix of output files.", + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_stats", + "description" : "output file (txt) with summary statistics of inner distances of paired reads", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_dist", + "description" : "output file (txt) with inner distances of all paired reads", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_freq", + "description" : "output file (txt) with frequencies of inner distances of all paired reads", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_plot", + "description" : "output file (pdf) with histogram plot of of inner distances of all paired reads", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_plot_r", + "description" : "output file (R) with script of histogram plot of of inner distances of all paired reads", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Calculate inner distance between read pairs.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.1093/bioinformatics/bts356" + ] + }, + "links" : { + "repository" : "https://github.com/MonashBioinformaticsPlatform/RSeQC", + "homepage" : "https://rseqc.sourceforge.net/", + "documentation" : "https://rseqc.sourceforge.net/#inner-distance-py", + "issue_tracker" : "https://github.com/MonashBioinformaticsPlatform/RSeQC/issues" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "r-base" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "RSeQC" + ], + "upgrade" : true + }, + { + "type" : "docker", + "run" : [ + "echo \\"RSeQC - inner_distance.py: $(inner_distance.py --version | cut -d' ' -f2)\\" > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/rseqc/rseqc_inner_distance/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/rseqc/rseqc_inner_distance", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT_FILE+x} ]; then echo "${VIASH_PAR_INPUT_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_file='&'#" ; else echo "# par_input_file="; fi ) +$( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_SIZE+x} ]; then echo "${VIASH_PAR_SAMPLE_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_size='&'#" ; else echo "# par_sample_size="; fi ) +$( if [ ! -z ${VIASH_PAR_MAPQ+x} ]; then echo "${VIASH_PAR_MAPQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mapq='&'#" ; else echo "# par_mapq="; fi ) +$( if [ ! -z ${VIASH_PAR_LOWER_BOUND+x} ]; then echo "${VIASH_PAR_LOWER_BOUND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_lower_bound='&'#" ; else echo "# par_lower_bound="; fi ) +$( if [ ! -z ${VIASH_PAR_UPPER_BOUND+x} ]; then echo "${VIASH_PAR_UPPER_BOUND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_upper_bound='&'#" ; else echo "# par_upper_bound="; fi ) +$( if [ ! -z ${VIASH_PAR_STEP+x} ]; then echo "${VIASH_PAR_STEP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_step='&'#" ; else echo "# par_step="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_PREFIX+x} ]; then echo "${VIASH_PAR_OUTPUT_PREFIX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_prefix='&'#" ; else echo "# par_output_prefix="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_STATS+x} ]; then echo "${VIASH_PAR_OUTPUT_STATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_stats='&'#" ; else echo "# par_output_stats="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DIST+x} ]; then echo "${VIASH_PAR_OUTPUT_DIST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_dist='&'#" ; else echo "# par_output_dist="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_FREQ+x} ]; then echo "${VIASH_PAR_OUTPUT_FREQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_freq='&'#" ; else echo "# par_output_freq="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_PLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_PLOT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_plot='&'#" ; else echo "# par_output_plot="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_PLOT_R+x} ]; then echo "${VIASH_PAR_OUTPUT_PLOT_R}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_plot_r='&'#" ; else echo "# par_output_plot_r="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -exo pipefail + + +inner_distance.py \\\\ + -i \\$par_input_file \\\\ + -r \\$par_refgene \\\\ + -o \\$par_output_prefix \\\\ + \\${par_sample_size:+-k "\\${par_sample_size}"} \\\\ + \\${par_lower_bound:+-l "\\${par_lower_bound}"} \\\\ + \\${par_upper_bound:+-u "\\${par_upper_bound}"} \\\\ + \\${par_step:+-s "\\${par_step}"} \\\\ + \\${par_mapq:+-q "\\${par_mapq}"} \\\\ +> stdout.txt + +if [[ -n \\$par_output_stats ]]; then head -n 2 stdout.txt > \\$par_output_stats; fi + + +[[ -n "\\$par_output_dist" && -f "\\$par_output_prefix.inner_distance.txt" ]] && mv \\$par_output_prefix.inner_distance.txt \\$par_output_dist +[[ -n "\\$par_output_plot" && -f "\\$par_output_prefix.inner_distance_plot.pdf" ]] && mv \\$par_output_prefix.inner_distance_plot.pdf \\$par_output_plot +[[ -n "\\$par_output_plot_r" && -f "\\$par_output_prefix.inner_distance_plot.r" ]] && mv \\$par_output_prefix.inner_distance_plot.r \\$par_output_plot_r +[[ -n "\\$par_output_freq" && -f "\\$par_output_prefix.inner_distance_freq.txt" ]] && mv \\$par_output_prefix.inner_distance_freq.txt \\$par_output_freq + +exit 0 +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/rseqc/rseqc_inner_distance", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/nextflow.config new file mode 100644 index 0000000..07c5da9 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'rseqc/rseqc_inner_distance' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Calculate inner distance between read pairs.\n' + author = 'Emma Rousseau' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/nextflow_schema.json new file mode 100644 index 0000000..6bcd0c8 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/nextflow_schema.json @@ -0,0 +1,209 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "rseqc_inner_distance", +"description": "Calculate inner distance between read pairs.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input_file": { + "type": + "string", + "description": "Type: `file`, required. input alignment file in BAM or SAM format", + "help_text": "Type: `file`, required. input alignment file in BAM or SAM format" + + } + + + , + "refgene": { + "type": + "string", + "description": "Type: `file`, required. Reference gene model in bed format", + "help_text": "Type: `file`, required. Reference gene model in bed format" + + } + + + , + "sample_size": { + "type": + "integer", + "description": "Type: `integer`, example: `1000000`. Numer of reads sampled from SAM/BAM file, default = 1000000", + "help_text": "Type: `integer`, example: `1000000`. Numer of reads sampled from SAM/BAM file, default = 1000000." + + } + + + , + "mapq": { + "type": + "integer", + "description": "Type: `integer`, example: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30", + "help_text": "Type: `integer`, example: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30." + + } + + + , + "lower_bound": { + "type": + "integer", + "description": "Type: `integer`, example: `-250`. Lower bound of inner distance (bp)", + "help_text": "Type: `integer`, example: `-250`. Lower bound of inner distance (bp). This option is used for ploting histograme, default=-250." + + } + + + , + "upper_bound": { + "type": + "integer", + "description": "Type: `integer`, example: `250`. Upper bound of inner distance (bp)", + "help_text": "Type: `integer`, example: `250`. Upper bound of inner distance (bp). This option is used for ploting histograme, default=250." + + } + + + , + "step": { + "type": + "integer", + "description": "Type: `integer`, example: `5`. Step size (bp) of histograme", + "help_text": "Type: `integer`, example: `5`. Step size (bp) of histograme. This option is used for plotting histogram, default=5." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_prefix": { + "type": + "string", + "description": "Type: `string`, required. Rrefix of output files", + "help_text": "Type: `string`, required. Rrefix of output files." + + } + + + , + "output_stats": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_stats`. output file (txt) with summary statistics of inner distances of paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_stats`. output file (txt) with summary statistics of inner distances of paired reads" + , + "default":"$id.$key.output_stats" + } + + + , + "output_dist": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_dist`. output file (txt) with inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_dist`. output file (txt) with inner distances of all paired reads" + , + "default":"$id.$key.output_dist" + } + + + , + "output_freq": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_freq`. output file (txt) with frequencies of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_freq`. output file (txt) with frequencies of inner distances of all paired reads" + , + "default":"$id.$key.output_freq" + } + + + , + "output_plot": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_plot`. output file (pdf) with histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_plot`. output file (pdf) with histogram plot of of inner distances of all paired reads" + , + "default":"$id.$key.output_plot" + } + + + , + "output_plot_r": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_plot_r`. output file (R) with script of histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_plot_r`. output file (R) with script of histogram plot of of inner distances of all paired reads" + , + "default":"$id.$key.output_plot_r" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/.config.vsh.yaml new file mode 100644 index 0000000..6cbda7d --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/.config.vsh.yaml @@ -0,0 +1,318 @@ +name: "salmon_index" +namespace: "salmon" +version: "v0.3.1" +authors: +- name: "Sai Nirmayi Yasa" + roles: + - "author" + - "maintainer" + info: + links: + email: "nirmayi@data-intuitive.com" + github: "sainirmayi" + linkedin: "sai-nirmayi-yasa" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Junior Bioinformatics Researcher" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--genome" + description: "Genome of the organism to prepare the set of decoy sequences. Required\ + \ to build decoy-aware transcriptome.\n" + info: null + example: + - "genome.fasta" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcripts" + alternatives: + - "-t" + description: "Transcript fasta file.\n" + info: null + example: + - "transcriptome.fasta" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--kmer_len" + alternatives: + - "-k" + description: "The size of k-mers that should be used for the quasi index.\n" + info: null + example: + - 31 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--gencode" + description: "This flag will expect the input transcript fasta to be in GENCODE\ + \ format, and will split the transcript name at the first '|' character. These\ + \ reduced names will be used in the output and when looking for these transcripts\ + \ in a gene to transcript GTF.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--features" + description: "This flag will expect the input reference to be in the tsv file\ + \ format, and will split the feature name at the first 'tab' character. These\ + \ reduced names will be used in the output and when looking for the sequence\ + \ of the features.GTF.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--keep_duplicates" + description: "This flag will disable the default indexing behavior of discarding\ + \ sequence-identical duplicate transcripts. If this flag is passed, then duplicate\ + \ transcripts that appear in the input will be retained and quantified separately.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--keep_fixed_fasta" + description: "Retain the fixed fasta file (without short transcripts and duplicates,\ + \ clipped, etc.) generated during indexing.\n" + info: null + direction: "input" + - type: "integer" + name: "--filter_size" + alternatives: + - "-f" + description: "The size of the Bloom filter that will be used by TwoPaCo during\ + \ indexing. The filter will be of size 2^{filter_size}. The default value of\ + \ -1 means that the filter size will be automatically set based on the number\ + \ of distinct k-mers in the input, as estimated by nthll.\n" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--sparse" + description: "Build the index using a sparse sampling of k-mer positions This\ + \ will require less memory (especially during quantification), but will take\ + \ longer to construct and can slow down mapping / alignment.\n" + info: null + direction: "input" + - type: "file" + name: "--decoys" + alternatives: + - "-d" + description: "Treat these sequences ids from the reference as the decoys that\ + \ may have sequence homologous to some known transcript. For example in case\ + \ of the genome, provide a list of chromosome names (one per line).\n" + info: null + example: + - "decoys.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--no_clip" + description: "Don't clip poly-A tails from the ends of target sequences.\n" + info: null + direction: "input" + - type: "string" + name: "--type" + alternatives: + - "-n" + description: "The type of index to build; the only option is \"puff\" in this\ + \ version of salmon.\n" + info: null + example: + - "puff" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--index" + alternatives: + - "-i" + description: "Salmon index\n" + info: null + example: + - "Salmon_index" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Salmon is a tool for wicked-fast transcript quantification from RNA-seq\ + \ data. It can either make use of pre-computed alignments (in the form of a SAM/BAM\ + \ file) to the transcripts rather than the raw reads, or can be run in the mapping-based\ + \ mode. This component creates a salmon index for the transcriptome to use Salmon\ + \ in the mapping-based mode. It is generally recommend that you build a decoy-aware\ + \ transcriptome file. This is done using the entire genome of the organism as the\ + \ decoy sequence by concatenating the genome to the end of the transcriptome to\ + \ be indexed and populating the decoys.txt file with the chromosome names.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "Transcriptome" +- "Index" +license: "GPL-3.0" +references: + doi: + - "10.1038/nmeth.4197" +links: + repository: "https://github.com/COMBINE-lab/salmon" + homepage: "https://salmon.readthedocs.io/en/latest/salmon.html" + documentation: "https://salmon.readthedocs.io/en/latest/salmon.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/salmon:1.10.2--hecfa306_0" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "salmon index -v 2>&1 | sed 's/salmon \\([0-9.]*\\)/salmon: \\1/' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/salmon/salmon_index/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/salmon/salmon_index" + executable: "target/nextflow/salmon/salmon_index/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/main.nf new file mode 100644 index 0000000..35e1683 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/main.nf @@ -0,0 +1,3994 @@ +// salmon_index v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Sai Nirmayi Yasa (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "salmon_index", + "namespace" : "salmon", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Sai Nirmayi Yasa", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "nirmayi@data-intuitive.com", + "github" : "sainirmayi", + "linkedin" : "sai-nirmayi-yasa" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Junior Bioinformatics Researcher" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--genome", + "description" : "Genome of the organism to prepare the set of decoy sequences. Required to build decoy-aware transcriptome.\n", + "example" : [ + "genome.fasta" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcripts", + "alternatives" : [ + "-t" + ], + "description" : "Transcript fasta file.\n", + "example" : [ + "transcriptome.fasta" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--kmer_len", + "alternatives" : [ + "-k" + ], + "description" : "The size of k-mers that should be used for the quasi index.\n", + "example" : [ + 31 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--gencode", + "description" : "This flag will expect the input transcript fasta to be in GENCODE format, and will split the transcript name at the first '|' character. These reduced names will be used in the output and when looking for these transcripts in a gene to transcript GTF.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--features", + "description" : "This flag will expect the input reference to be in the tsv file format, and will split the feature name at the first 'tab' character. These reduced names will be used in the output and when looking for the sequence of the features.GTF.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--keep_duplicates", + "description" : "This flag will disable the default indexing behavior of discarding sequence-identical duplicate transcripts. If this flag is passed, then duplicate transcripts that appear in the input will be retained and quantified separately.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--keep_fixed_fasta", + "description" : "Retain the fixed fasta file (without short transcripts and duplicates, clipped, etc.) generated during indexing.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--filter_size", + "alternatives" : [ + "-f" + ], + "description" : "The size of the Bloom filter that will be used by TwoPaCo during indexing. The filter will be of size 2^{filter_size}. The default value of -1 means that the filter size will be automatically set based on the number of distinct k-mers in the input, as estimated by nthll.\n", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--sparse", + "description" : "Build the index using a sparse sampling of k-mer positions This will require less memory (especially during quantification), but will take longer to construct and can slow down mapping / alignment.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--decoys", + "alternatives" : [ + "-d" + ], + "description" : "Treat these sequences ids from the reference as the decoys that may have sequence homologous to some known transcript. For example in case of the genome, provide a list of chromosome names (one per line).\n", + "example" : [ + "decoys.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--no_clip", + "description" : "Don't clip poly-A tails from the ends of target sequences.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--type", + "alternatives" : [ + "-n" + ], + "description" : "The type of index to build; the only option is \\"puff\\" in this version of salmon.\n", + "example" : [ + "puff" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--index", + "alternatives" : [ + "-i" + ], + "description" : "Salmon index\n", + "example" : [ + "Salmon_index" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Salmon is a tool for wicked-fast transcript quantification from RNA-seq data. It can either make use of pre-computed alignments (in the form of a SAM/BAM file) to the transcripts rather than the raw reads, or can be run in the mapping-based mode. This component creates a salmon index for the transcriptome to use Salmon in the mapping-based mode. It is generally recommend that you build a decoy-aware transcriptome file. This is done using the entire genome of the organism as the decoy sequence by concatenating the genome to the end of the transcriptome to be indexed and populating the decoys.txt file with the chromosome names.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "Transcriptome", + "Index" + ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.1038/nmeth.4197" + ] + }, + "links" : { + "repository" : "https://github.com/COMBINE-lab/salmon", + "homepage" : "https://salmon.readthedocs.io/en/latest/salmon.html", + "documentation" : "https://salmon.readthedocs.io/en/latest/salmon.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/salmon:1.10.2--hecfa306_0", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "salmon index -v 2>&1 | sed 's/salmon \\\\([0-9.]*\\\\)/salmon: \\\\1/' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/salmon/salmon_index/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/salmon/salmon_index", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -e + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_GENOME+x} ]; then echo "${VIASH_PAR_GENOME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome='&'#" ; else echo "# par_genome="; fi ) +$( if [ ! -z ${VIASH_PAR_TRANSCRIPTS+x} ]; then echo "${VIASH_PAR_TRANSCRIPTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcripts='&'#" ; else echo "# par_transcripts="; fi ) +$( if [ ! -z ${VIASH_PAR_KMER_LEN+x} ]; then echo "${VIASH_PAR_KMER_LEN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kmer_len='&'#" ; else echo "# par_kmer_len="; fi ) +$( if [ ! -z ${VIASH_PAR_GENCODE+x} ]; then echo "${VIASH_PAR_GENCODE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gencode='&'#" ; else echo "# par_gencode="; fi ) +$( if [ ! -z ${VIASH_PAR_FEATURES+x} ]; then echo "${VIASH_PAR_FEATURES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_features='&'#" ; else echo "# par_features="; fi ) +$( if [ ! -z ${VIASH_PAR_KEEP_DUPLICATES+x} ]; then echo "${VIASH_PAR_KEEP_DUPLICATES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_keep_duplicates='&'#" ; else echo "# par_keep_duplicates="; fi ) +$( if [ ! -z ${VIASH_PAR_KEEP_FIXED_FASTA+x} ]; then echo "${VIASH_PAR_KEEP_FIXED_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_keep_fixed_fasta='&'#" ; else echo "# par_keep_fixed_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_FILTER_SIZE+x} ]; then echo "${VIASH_PAR_FILTER_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_filter_size='&'#" ; else echo "# par_filter_size="; fi ) +$( if [ ! -z ${VIASH_PAR_SPARSE+x} ]; then echo "${VIASH_PAR_SPARSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sparse='&'#" ; else echo "# par_sparse="; fi ) +$( if [ ! -z ${VIASH_PAR_DECOYS+x} ]; then echo "${VIASH_PAR_DECOYS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_decoys='&'#" ; else echo "# par_decoys="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_CLIP+x} ]; then echo "${VIASH_PAR_NO_CLIP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_clip='&'#" ; else echo "# par_no_clip="; fi ) +$( if [ ! -z ${VIASH_PAR_TYPE+x} ]; then echo "${VIASH_PAR_TYPE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_type='&'#" ; else echo "# par_type="; fi ) +$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +unset_if_false=( + par_gencode + par_features + par_keep_duplicates + par_keep_fixed_fasta + par_sparse + par_no_clip +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +tmp_dir=\\$(mktemp -d -p "\\$meta_temp_dir" "\\${meta_name}_XXXXXX") +mkdir -p "\\$tmp_dir/temp" + +if [[ -f "\\$par_genome" ]] && [[ ! "\\$par_decoys" ]]; then + filename="\\$(basename -- \\$par_genome)" + decoys="decoys.txt" + if [ \\${filename##*.} == "gz" ]; then + grep '^>' <(gunzip -c \\$par_genome) | cut -d ' ' -f 1 > \\$decoys + gentrome="gentrome.fa.gz" + else + grep '^>' \\$par_genome | cut -d ' ' -f 1 > \\$decoys + gentrome="gentrome.fa" + fi + sed -i.bak -e 's/>//g' \\$decoys + cat \\$par_transcripts \\$par_genome > \\$gentrome +else + gentrome=\\$par_transcripts + decoys=\\$par_decoys +fi + +salmon index \\\\ + -t "\\$gentrome" \\\\ + --tmpdir "\\$tmp_dir/temp" \\\\ + \\${meta_cpus:+--threads "\\${meta_cpus}"} \\\\ + -i "\\$par_index" \\\\ + \\${par_kmer_len:+-k "\\${par_kmer_len}"} \\\\ + \\${par_gencode:+--gencode} \\\\ + \\${par_features:+--features} \\\\ + \\${par_keep_duplicates:+--keepDuplicates} \\\\ + \\${par_keep_fixed_fasta:+--keepFixedFasta} \\\\ + \\${par_filter_size:+-f "\\${par_filter_size}"} \\\\ + \\${par_sparse:+--sparse} \\\\ + \\${decoys:+-d "\\${decoys}"} \\\\ + \\${par_no_clip:+--no-clip} \\\\ + \\${par_type:+--type "\\${par_type}"} +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/salmon/salmon_index", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/nextflow.config new file mode 100644 index 0000000..98af638 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'salmon/salmon_index' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Salmon is a tool for wicked-fast transcript quantification from RNA-seq data. It can either make use of pre-computed alignments (in the form of a SAM/BAM file) to the transcripts rather than the raw reads, or can be run in the mapping-based mode. This component creates a salmon index for the transcriptome to use Salmon in the mapping-based mode. It is generally recommend that you build a decoy-aware transcriptome file. This is done using the entire genome of the organism as the decoy sequence by concatenating the genome to the end of the transcriptome to be indexed and populating the decoys.txt file with the chromosome names.\n' + author = 'Sai Nirmayi Yasa' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/nextflow_schema.json new file mode 100644 index 0000000..5e67d15 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/nextflow_schema.json @@ -0,0 +1,211 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "salmon_index", +"description": "Salmon is a tool for wicked-fast transcript quantification from RNA-seq data. It can either make use of pre-computed alignments (in the form of a SAM/BAM file) to the transcripts rather than the raw reads, or can be run in the mapping-based mode. This component creates a salmon index for the transcriptome to use Salmon in the mapping-based mode. It is generally recommend that you build a decoy-aware transcriptome file. This is done using the entire genome of the organism as the decoy sequence by concatenating the genome to the end of the transcriptome to be indexed and populating the decoys.txt file with the chromosome names.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "genome": { + "type": + "string", + "description": "Type: `file`, example: `genome.fasta`. Genome of the organism to prepare the set of decoy sequences", + "help_text": "Type: `file`, example: `genome.fasta`. Genome of the organism to prepare the set of decoy sequences. Required to build decoy-aware transcriptome.\n" + + } + + + , + "transcripts": { + "type": + "string", + "description": "Type: `file`, required, example: `transcriptome.fasta`. Transcript fasta file", + "help_text": "Type: `file`, required, example: `transcriptome.fasta`. Transcript fasta file.\n" + + } + + + , + "kmer_len": { + "type": + "integer", + "description": "Type: `integer`, example: `31`. The size of k-mers that should be used for the quasi index", + "help_text": "Type: `integer`, example: `31`. The size of k-mers that should be used for the quasi index.\n" + + } + + + , + "gencode": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. This flag will expect the input transcript fasta to be in GENCODE format, and will split the transcript name at the first \u0027|\u0027 character", + "help_text": "Type: `boolean_true`, default: `false`. This flag will expect the input transcript fasta to be in GENCODE format, and will split the transcript name at the first \u0027|\u0027 character. These reduced names will be used in the output and when looking for these transcripts in a gene to transcript GTF.\n" + , + "default":false + } + + + , + "features": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. This flag will expect the input reference to be in the tsv file format, and will split the feature name at the first \u0027tab\u0027 character", + "help_text": "Type: `boolean_true`, default: `false`. This flag will expect the input reference to be in the tsv file format, and will split the feature name at the first \u0027tab\u0027 character. These reduced names will be used in the output and when looking for the sequence of the features.GTF.\n" + , + "default":false + } + + + , + "keep_duplicates": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. This flag will disable the default indexing behavior of discarding sequence-identical duplicate transcripts", + "help_text": "Type: `boolean_true`, default: `false`. This flag will disable the default indexing behavior of discarding sequence-identical duplicate transcripts. If this flag is passed, then duplicate transcripts that appear in the input will be retained and quantified separately.\n" + , + "default":false + } + + + , + "keep_fixed_fasta": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Retain the fixed fasta file (without short transcripts and duplicates, clipped, etc", + "help_text": "Type: `boolean_true`, default: `false`. Retain the fixed fasta file (without short transcripts and duplicates, clipped, etc.) generated during indexing.\n" + , + "default":false + } + + + , + "filter_size": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. The size of the Bloom filter that will be used by TwoPaCo during indexing", + "help_text": "Type: `integer`, example: `-1`. The size of the Bloom filter that will be used by TwoPaCo during indexing. The filter will be of size 2^{filter_size}. The default value of -1 means that the filter size will be automatically set based on the number of distinct k-mers in the input, as estimated by nthll.\n" + + } + + + , + "sparse": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Build the index using a sparse sampling of k-mer positions This will require less memory (especially during quantification), but will take longer to construct and can slow down mapping / alignment", + "help_text": "Type: `boolean_true`, default: `false`. Build the index using a sparse sampling of k-mer positions This will require less memory (especially during quantification), but will take longer to construct and can slow down mapping / alignment.\n" + , + "default":false + } + + + , + "decoys": { + "type": + "string", + "description": "Type: `file`, example: `decoys.txt`. Treat these sequences ids from the reference as the decoys that may have sequence homologous to some known transcript", + "help_text": "Type: `file`, example: `decoys.txt`. Treat these sequences ids from the reference as the decoys that may have sequence homologous to some known transcript. For example in case of the genome, provide a list of chromosome names (one per line).\n" + + } + + + , + "no_clip": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Don\u0027t clip poly-A tails from the ends of target sequences", + "help_text": "Type: `boolean_true`, default: `false`. Don\u0027t clip poly-A tails from the ends of target sequences.\n" + , + "default":false + } + + + , + "type": { + "type": + "string", + "description": "Type: `string`, example: `puff`. The type of index to build; the only option is \"puff\" in this version of salmon", + "help_text": "Type: `string`, example: `puff`. The type of index to build; the only option is \"puff\" in this version of salmon.\n" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "index": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.index`, example: `Salmon_index`. Salmon index\n", + "help_text": "Type: `file`, required, default: `$id.$key.index`, example: `Salmon_index`. Salmon index\n" + , + "default":"$id.$key.index" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/.config.vsh.yaml new file mode 100644 index 0000000..49dd9ef --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/.config.vsh.yaml @@ -0,0 +1,1214 @@ +name: "salmon_quant" +namespace: "salmon" +version: "v0.3.1" +authors: +- name: "Sai Nirmayi Yasa" + roles: + - "author" + - "maintainer" + info: + links: + email: "nirmayi@data-intuitive.com" + github: "sainirmayi" + linkedin: "sai-nirmayi-yasa" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Junior Bioinformatics Researcher" +argument_groups: +- name: "Common input options" + arguments: + - type: "string" + name: "--lib_type" + alternatives: + - "-l" + description: "Format string describing the library.\nThe library type string consists\ + \ of three parts: \n1. Relative orientation of the reads: This part is only\ + \ provided if the library is paired-end, The possible options are\n I = inward\n\ + \ O = outward\n M = matching\n2. Strandedness of the library: This part specifies\ + \ whether the protocol is stranded or unstranded. The options are:\n S = stranded\n\ + \ U = unstranded\n3. Directionality of the reads: If the library is stranded,\ + \ the final part of the library string is used to specify the strand from which\ + \ the read originates. The possible values are\n F = read 1 (or single-end\ + \ read) comes from the forward strand\n R = read 1 (or single-end read) comes\ + \ from the reverse strand\n" + info: null + default: + - "A" + required: false + choices: + - "A" + - "U" + - "SF" + - "SR" + - "IU" + - "IS" + - "ISF" + - "ISR" + - "OU" + - "OS" + - "OSF" + - "OSR" + - "MU" + - "MS" + - "MSF" + - "MSR" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Mapping input options" + arguments: + - type: "file" + name: "--index" + alternatives: + - "-i" + description: "Salmon index.\n" + info: null + example: + - "transcriptome_index" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--unmated_reads" + alternatives: + - "-r" + description: "List of files containing unmated reads of (e.g. single-end reads).\n" + info: null + example: + - "sample.fq.gz" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--mates1" + alternatives: + - "-m1" + description: "File containing the #1 mates.\n" + info: null + example: + - "sample_1.fq.gz" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--mates2" + alternatives: + - "-m2" + description: "File containing the #2 mates.\n" + info: null + example: + - "sample_2.fq.gz" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Alignment input options" + arguments: + - type: "boolean_true" + name: "--discard_orphans" + description: "Discard orphan alignments in the input [for alignment-based mode\ + \ only]. If this flag is passed, then only paired alignments will be considered\ + \ toward quantification estimates. The default behavior is to consider orphan\ + \ alignments if no valid paired mappings exist.\n" + info: null + direction: "input" + - type: "file" + name: "--alignments" + alternatives: + - "-a" + description: "Input alignment (BAM) file(s).\n" + info: null + example: + - "sample.fq.gz" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--eqclasses" + alternatives: + - "-e" + description: "input salmon weighted equivalence class file.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--targets" + alternatives: + - "-t" + description: "FASTA format file containing target transcripts.\n" + info: null + example: + - "transcripts.fasta" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--ont" + description: "Use alignment model for Oxford Nanopore long reads\n" + info: null + direction: "input" +- name: "Output" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output quantification directory.\n" + info: null + example: + - "quant_output" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_results" + description: "Salmon quantification file.\n" + info: null + example: + - "quant.sf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Basic options" + arguments: + - type: "boolean_true" + name: "--seq_bias" + description: "Perform sequence-specific bias correction.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--gc_bias" + description: "Perform fragment GC bias correction [beta for single-end reads].\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--pos_bias" + description: "Perform positional bias correction.\n" + info: null + direction: "input" + - type: "double" + name: "--incompat_prior" + description: "Set the prior probability that an alignment that disagrees with\ + \ the specified library type (--lib_type) results from the true fragment origin.\ + \ Setting this to 0 specifies that alignments that disagree with the library\ + \ type should be \"impossible\", while setting it to 1 says that alignments\ + \ that disagree with the library type are no less likely than those that do.\n" + info: null + example: + - 0.0 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gene_map" + alternatives: + - "-g" + description: "File containing a mapping of transcripts to genes. If this file\ + \ is provided salmon will output both quant.sf and quant.genes.sf files, where\ + \ the latter contains aggregated gene-level abundance estimates. The transcript\ + \ to gene mapping should be provided as either a GTF file, or a in a simple\ + \ tab-delimited format where each line contains the name of a transcript and\ + \ the gene to which it belongs separated by a tab. The extension of the file\ + \ is used to determine how the file should be parsed. Files ending in '.gtf',\ + \ '.gff' or '.gff3' are assumed to be in GTF format; files with any other extension\ + \ are assumed to be in the simple format. In GTF / GFF format, the \"transcript_id\"\ + \ is assumed to contain the transcript identifier and the \"gene_id\" is assumed\ + \ to contain the corresponding gene identifier.\n" + info: null + example: + - "gene_map.gtf" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--aux_target_file" + description: "A file containing a list of \"auxiliary\" targets. These are valid\ + \ targets (i.e., not decoys) to which fragments are allowed to map and be assigned,\ + \ and which will be quantified, but for which auxiliary models like sequence-specific\ + \ and fragment-GC bias correction should not be applied.\n" + info: null + example: + - "auxilary_targets.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--meta" + description: "If you're using Salmon on a metagenomic dataset, consider setting\ + \ this flag to disable parts of the abundance estimation model that make less\ + \ sense for metagenomic data.\n" + info: null + direction: "input" + - type: "double" + name: "--score_exp" + description: "The factor by which sub-optimal alignment scores are downweighted\ + \ to produce a probability. If the best alignment score for the current read\ + \ is S, and the score for a particular alignment is w, then the probability\ + \ will be computed porportional to exp( - scoreExp * (S-w) ).\n" + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Options specific to mapping mode" + arguments: + - type: "boolean_true" + name: "--discard_orphans_quasi" + description: "[selective-alignment mode only] \nDiscard orphan mappings in selective-alignment\ + \ mode. If this flag is passed then only paired mappings will be considered\ + \ toward quantification estimates. The default behavior is to consider orphan\ + \ mappings if no valid paired mappings exist. This flag is independent of the\ + \ option to write the orphaned mappings to file (--writeOrphanLinks).\n" + info: null + direction: "input" + - type: "double" + name: "--consensus_slack" + description: "[selective-alignment mode only] \nThe amount of slack allowed in\ + \ the selective-alignment filtering mechanism. If this is set to a fraction,\ + \ X, greater than 0 (and in [0,1)), then uniMEM chains with scores below (100\ + \ * X)% of the best chain score for a read, and read pairs with a sum of chain\ + \ scores below (100 * X)% of the best chain score for a read pair will be discounted\ + \ as a mapping candidates. The default value of this option is 0.35.\n" + info: null + example: + - 0.35 + required: false + min: 0.0 + max: 0.999999999 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--pre_merge_chain_sub_thresh" + description: "[selective-alignment mode only] \nThe threshold of sub-optimal chains,\ + \ compared to the best chain on a given target, that will be retained and passed\ + \ to the next phase of mapping. Specifically, if the best chain for a read (or\ + \ read-end in paired-end mode) to target t has score X_t, then all chains for\ + \ this read with score >= X_t * preMergeChainSubThresh will be retained and\ + \ passed to subsequent mapping phases. This value must be in the range [0,\ + \ 1].\n" + info: null + example: + - 0.75 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--post_merge_chain_sub_thresh" + description: "[selective-alignment mode only] \nThe threshold of sub-optimal chains,\ + \ compared to the best chain on a given target, that will be retained and passed\ + \ to the next phase of mapping. This is different than post_merge_chain_sub_thresh,\ + \ because this is applied to pairs of chains (from the ends of paired-end reads)\ + \ after merging (i.e. after checking concordancy constraints etc.). Specifically,\ + \ if the best chain pair to target t has score X_t, then all chain pairs for\ + \ this read pair with score >= X_t * post_merge_chain_sub_thresh will be retained\ + \ and passed to subsequent mapping phases. This value must be in the range [0,\ + \ 1]. Note: This option is only meaningful for paired-end libraries, and is\ + \ ignored for single-end libraries.\n" + info: null + example: + - 0.9 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--orphan_chain_sub_thresh" + description: "[selective-alignment mode only]\nThis threshold sets a global sub-optimality\ + \ threshold for chains corresponding to orphan mappings. That is, if the merging\ + \ procedure results in no concordant mappings then only orphan mappings with\ + \ a chain score >= orphan_chain_sub_thresh * bestChainScore will be retained\ + \ and passed to subsequent mapping phases. This value must be in the range [0,\ + \ 1]. Note: This option is only meaningful for paired-end libraries, and is\ + \ ignored for single-end libraries.\n" + info: null + example: + - 0.95 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--min_score_fraction" + description: "[selective-alignment mode only]\nThe fraction of the optimal possible\ + \ alignment score that a mapping must achieve in order to be considered \"valid\"\ + \ --- should be in (0,1]. Default 0.65\n" + info: null + example: + - 0.65 + required: false + min: 1.0E-9 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--mismatch_seed_skip" + description: "[selective-alignment mode only]\nAfter a k-mer hit is extended to\ + \ a uni-MEM, the uni-MEM extension can terminate for one of 3 reasons; the end\ + \ of the read, the end of the unitig, or a mismatch. If the extension ends because\ + \ of a mismatch, this is likely the result of a sequencing error. To avoid looking\ + \ up many k-mers that will likely fail to be located in the index, the search\ + \ procedure skips by a factor of mismatch_seed_skip until it either (1) finds\ + \ another match or (2) is k-bases past the mismatch position. This value controls\ + \ that skip length. A smaller value can increase sensitivity, while a larger\ + \ value can speed up seeding.\n" + info: null + example: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--disable_chaining_heuristic" + description: "[selective-alignment mode only] \nBy default, the heuristic of (Li\ + \ 2018) is implemented, which terminates the chaining DP once a given number\ + \ of valid backpointers are found. This speeds up the seed (MEM) chaining step,\ + \ but may result in sub-optimal chains in complex situations (e.g. sequences\ + \ with many repeats and overlapping repeats). Passing this flag will disable\ + \ the chaining heuristic, and perform the full chaining dynamic program, guaranteeing\ + \ the optimal chain is found in this step.\n" + info: null + direction: "input" + - type: "double" + name: "--decoy_threshold" + description: "[selective-alignment mode only]\nFor an alignemnt to an annotated\ + \ transcript to be considered invalid, it must have an alignment score < (decoy_threshold\ + \ * bestDecoyScore). A value of 1.0 means that any alignment strictly worse\ + \ than the best decoy alignment will be discarded. A smaller value will allow\ + \ reads to be allocated to transcripts even if they strictly align better to\ + \ the decoy sequence.\n" + info: null + example: + - 1.0 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--ma" + description: "[selective-alignment mode only]\nThe value given to a match between\ + \ read and reference nucleotides in an alignment.\n" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--mp" + description: "[selective-alignment mode only]\nThe value given to a mis-match\ + \ between read and reference nucleotides in an alignment.\n" + info: null + example: + - -4 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--go" + description: "[selective-alignment mode only]\nThe value given to a gap opening\ + \ in an alignment.\n" + info: null + example: + - 6 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--ge" + description: "[selective-alignment mode only]\nThe value given to a gap extension\ + \ in an alignment.\n" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bandwidth" + description: "[selective-alignment mode only]\nThe value used for the bandwidth\ + \ passed to ksw2. A smaller bandwidth can make the alignment verification run\ + \ more quickly, but could possibly miss valid alignments.\n" + info: null + example: + - 15 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--allow_dovetail" + description: "[selective-alignment mode only] \nAllow dovetailing mappings.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--recover_orphans" + description: "[selective-alignment mode only] \nAttempt to recover the mates of\ + \ orphaned reads. This uses edlib for orphan recovery, and so introduces some\ + \ computational overhead, but it can improve sensitivity.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--mimicBT2" + description: "[selective-alignment mode only] \nSet flags to mimic parameters\ + \ similar to Bowtie2 with --no-discordant and --no-mixed flags. This increases\ + \ disallows dovetailing reads, and discards orphans. Note, this does not impose\ + \ the very strict parameters assumed by RSEM+Bowtie2, like gapless alignments.\ + \ For that behavior, use the --mimic_strictBT2 flag below.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--mimic_strictBT2" + description: "[selective-alignment mode only] \nSet flags to mimic the very strict\ + \ parameters used by RSEM+Bowtie2. This increases --min_score_fraction to 0.8,\ + \ disallows dovetailing reads, discards orphans, and disallows gaps in alignments.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--softclip" + description: "[selective-alignment mode only] \nAllos soft-clipping of reads during\ + \ selective-alignment. If this option is provided, then regions at the beginning\ + \ or end of the read can be withheld from alignment without any effect on the\ + \ resulting score (i.e. neither adding nor removing from the score). This will\ + \ drastically reduce the penalty if there are mismatches at the beginning or\ + \ end of the read due to e.g. low-quality bases or adapters. NOTE: Even with\ + \ soft-clipping enabled, the read must still achieve a score of at least min_score_fraction\ + \ * maximum achievable score, where the maximum achievable score is computed\ + \ based on the full (un-clipped) read length.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--softclip_overhangs" + description: "[selective-alignment mode only] \nAllow soft-clipping of reads that\ + \ overhang the beginning or ends of the transcript. In this case, the overhaning\ + \ section of the read will simply be unaligned, and will not contribute or detract\ + \ from the alignment score. The default policy is to force an end-to-end alignment\ + \ of the entire read, so that overhanings will result in some deletion of nucleotides\ + \ from the read.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--full_length_alignment" + description: "[selective-alignment mode only] \nPerform selective alignment over\ + \ the full length of the read, beginning from the (approximate) initial mapping\ + \ location and using extension alignment. This is in contrast with the default\ + \ behavior which is to only perform alignment between the MEMs in the optimal\ + \ chain (and before the first and after the last MEM if applicable). The default\ + \ strategy forces the MEMs to belong to the alignment, but has the benefit that\ + \ it can discover indels prior to the first hit shared between the read and\ + \ reference. Except in very rare circumstances, the default mode should be more\ + \ accurate.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--hard_filter" + description: "[selective-alignment mode only] \nInstead of weighting mappings\ + \ by their alignment score, this flag will discard any mappings with sub-optimal\ + \ alignment score. The default option of soft-filtering (i.e. weighting mappings\ + \ by their alignment score) usually yields slightly more accurate abundance\ + \ estimates but this flag may be desirable if you want more accurate 'naive'\ + \ equivalence classes, rather than range factorized equivalence classes.\n" + info: null + direction: "input" + - type: "double" + name: "--min_aln_prob" + description: "The minimum number of fragments that must be assigned to the transcriptome\ + \ for quantification to proceed.\n" + info: null + example: + - 1.0E-5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--write_mappings" + alternatives: + - "-z" + description: "If this option is provided, then the selective-alignment results\ + \ will be written out in SAM-compatible format. By default, output will be directed\ + \ to stdout, but an alternative file name can be provided instead.\n" + info: null + direction: "input" + - type: "file" + name: "--mapping_sam" + description: "Path to file that should output the selective-alignment results\ + \ in SAM-compatible format. This option must be provided while using --write_mappings" + info: null + example: + - "mappings.sam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--write_qualities" + description: "This flag only has meaning if mappings are being written (with --write_mappings/-z).\ + \ If this flag is provided, then the output SAM file will contain quality strings\ + \ as well as read sequences. Note that this can greatly increase the size of\ + \ the output file.\n" + info: null + direction: "input" + - type: "string" + name: "--hit_filter_policy" + description: "[selective-alignment mode only]\nDetermines the policy by which\ + \ hits are filtered in selective alignment. Filtering hits after chaining (the\ + \ default) is more sensitive, but more computationally intensive, because it\ + \ performs the chaining dynamic program for all hits. Filtering before chaining\ + \ is faster, but some true hits may be missed. The options are BEFORE, AFTER,\ + \ BOTH and NONE.\n" + info: null + example: + - "AFTER" + required: false + choices: + - "BEFORE" + - "AFTER" + - "BOTH" + - "NONE" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Advance options" + arguments: + - type: "boolean_true" + name: "--alternative_init_mode" + description: "Use an alternative strategy (rather than simple interpolation between)\ + \ the online and uniform abundance estimates to initialize the EM / VBEM algorithm.\n" + info: null + direction: "input" + - type: "file" + name: "--aux_dir" + description: "The sub-directory of the quantification directory where auxiliary\ + \ information e.g. bootstraps, bias parameters, etc. will be written.\n" + info: null + example: + - "aux_info" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--skip_quant" + description: "Skip performing the actual transcript quantification (including\ + \ any Gibbs sampling or bootstrapping).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--dump_eq" + description: "Dump the simple equivalence class counts that were computed during\ + \ mapping or alignment.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--dump_eq_weights" + alternatives: + - "-d" + description: "Dump conditional probabilities associated with transcripts when\ + \ equivalence class information is being dumped to file. Note, this will dump\ + \ the factorization that is actually used by salmon's offline phase for inference.\ + \ If you are using range-factorized equivalence classes (the default) then the\ + \ same transcript set may appear multiple times with different associated conditional\ + \ probabilities.\n" + info: null + direction: "input" + - type: "integer" + name: "--min_assigned_frags" + description: "The minimum number of fragments that must be assigned to the transcriptome\ + \ for quantification to proceed.\n" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--reduce_GC_memory" + description: "If this option is selected, a more memory efficient (but slightly\ + \ slower) representation is used to compute fragment GC content. Enabling this\ + \ will reduce memory usage, but can also reduce speed. However, the results\ + \ themselves will remain the same.\n" + info: null + direction: "input" + - type: "integer" + name: "--bias_speed_samp" + description: "The value at which the fragment length PMF is down-sampled when\ + \ evaluating sequence-specific & GC fragment bias. Larger values speed up effective\ + \ length correction, but may decrease the fidelity of bias modeling results.\n" + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--fld_max" + description: "The maximum fragment length to consider when building the empirical\ + \ distribution\n" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--fld_mean" + description: "The mean used in the fragment length distribution prior\n" + info: null + example: + - 250 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--fld_SD" + description: "The standard deviation used in the fragment length distribution\ + \ prior\n" + info: null + example: + - 25 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--forgetting_factor" + alternatives: + - "-f" + description: "The forgetting factor used in the online learning schedule. A smallervalue\ + \ results in quicker learning, but higher variance and may be unstable. A larger\ + \ value results in slower learning but may be more stable. Value should be\ + \ in the interval (0.5, 1.0].\n" + info: null + example: + - 0.65 + required: false + min: 0.500000001 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--init_uniform" + description: "Initialize the offline inference with uniform parameters, rather\ + \ than seeding with online parameters.\n" + info: null + direction: "input" + - type: "integer" + name: "--max_occs_per_hit" + description: "When collecting \"hits\" (MEMs), hits having more than max_occs_per_hit\ + \ occurrences won't be considered.\n" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_read_occ" + description: "Reads \"mapping\" to more than this many places won't be considered.\n" + info: null + example: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--no_length_correction" + description: "Entirely disables length correction when estimating the abundance\ + \ of transcripts. This option can be used with protocols where one expects that\ + \ fragments derive from their underlying targets without regard to that target's\ + \ length (e.g. QuantSeq)\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_effective_length_correction" + description: "Disables effective length correction when computing the probability\ + \ that a fragment was generated from a transcript. If this flag is passed in,the\ + \ fragment length distribution is not taken into account when computing this\ + \ probability.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_single_frag_prob" + description: "Disables the estimation of an associated fragment length probability\ + \ for single-end reads or for orphaned mappings in paired-end libraries. The\ + \ default behavior is to consider the probability of all possible fragment\ + \ lengths associated with the retained mapping. Enabling this flag (i.e. turning\ + \ this default behavior off) will simply not attempt to estimate a fragment\ + \ length probability in such cases.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_frag_length_dist" + description: "Don't consider concordance with the learned fragment length distribution\ + \ when trying to determine the probability that a fragment has originated from\ + \ a specified location. Normally, Fragments with unlikely lengths will be assigned\ + \ a smaller relative probability than those with more likely lengths. When this\ + \ flag is passed in, the observed fragment length has no effect on that fragment's\ + \ a priori probability.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_bias_length_threshold" + description: "If this option is enabled, then no (lower) threshold will be set\ + \ on how short bias correction can make effective lengths. This can increase\ + \ the precision of bias correction, but harm robustness. The default correction\ + \ applies a threshold.\n" + info: null + direction: "input" + - type: "integer" + name: "--num_bias_samples" + description: "Number of fragment mappings to use when learning the sequence-specific\ + \ bias model.\n" + info: null + example: + - 2000000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--num_aux_model_samples" + description: "The first are used to train the auxiliary\ + \ model parameters (e.g. fragment length distribution, bias, etc.). After ther\ + \ first observations the auxiliary model parameters\ + \ will be assumed to have converged and will be fixed.\n" + info: null + example: + - 5000000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--num_pre_aux_model_samples" + description: "The first will have their assignment likelihoods\ + \ and contributions to the transcript abundances computed without applying any\ + \ auxiliary models. The purpose of ignoring the auxiliary models for the first\ + \ observations is to avoid applying these models\ + \ before their parameters have been learned sufficiently well.\n" + info: null + example: + - 5000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--useEM" + description: "Use the traditional EM algorithm for optimization in the batch passes.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--useVBOpt" + description: "Use the Variational Bayesian EM [default]\n" + info: null + direction: "input" + - type: "integer" + name: "--range_factorization_bins" + description: "Factorizes the likelihood used in quantification by adopting a new\ + \ notion of equivalence classes based on the conditional probabilities with\ + \ which fragments are generated from different transcripts. This is a more fine-grained\ + \ factorization than the normal rich equivalence classes. The default value\ + \ (4) corresponds to the default used in Zakeri et al. 2017 (doi: 10.1093/bioinformatics/btx262),\ + \ and larger values imply a more fine-grained factorization. If range factorization\ + \ is enabled, a common value to select for this parameter is 4. A value of 0\ + \ signifies the use of basic rich equivalence classes.\n" + info: null + example: + - 4 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--num_Gibbs_samples" + description: "Number of Gibbs sampling rounds to perform.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--no_Gamma_draw" + description: "This switch will disable drawing transcript fractions from a Gamma\ + \ distribution during Gibbs sampling. In this case the sampler does not account\ + \ for shot-noise, but only assignment ambiguity\n" + info: null + direction: "input" + - type: "integer" + name: "--num_bootstraps" + description: "Number of bootstrap samples to generate. Note: This is mutually\ + \ exclusive with Gibbs sampling.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--bootstrap_reproject" + description: "This switch will learn the parameter distribution from the bootstrapped\ + \ counts for each sample, but will reproject those parameters onto the original\ + \ equivalence class counts.\n" + info: null + direction: "input" + - type: "integer" + name: "--thinning_factor" + description: "Number of steps to discard for every sample kept from the Gibbs\ + \ chain. The larger this number, the less chance that subsequent samples are\ + \ auto-correlated, but the slower sampling becomes.\n" + info: null + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--quiet" + alternatives: + - "-q" + description: "Be quiet while doing quantification (don't write informative output\ + \ to the console unless something goes wrong).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--per_transcript_prior" + description: "The prior (either the default or the argument provided via --vb_prior)\ + \ will be interpreted as a transcript-level prior (i.e. each transcript will\ + \ be given a prior read count of this value)\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--per_nucleotide_prior" + description: "The prior (either the default or the argument provided via --vb_prior)\ + \ will be interpreted as a nucleotide-level prior (i.e. each nucleotide will\ + \ be given a prior read count of this value)\n" + info: null + direction: "input" + - type: "integer" + name: "--sig_digits" + description: "The number of significant digits to write when outputting the EffectiveLength\ + \ and NumReads columns\n" + info: null + example: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--vb_prior" + description: "The prior that will be used in the VBEM algorithm. This is interpreted\ + \ as a per-transcript prior, unless the --per_nucleotide_prior flag is also\ + \ given. If the --per_nucleotide_prior flag is given, this is used as a nucleotide-level\ + \ prior. If the default is used, it will be divided by 1000 before being used\ + \ as a nucleotide-level prior, i.e. the default per-nucleotide prior will be\ + \ 1e-5.\n" + info: null + example: + - 0.01 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--write_orphan_links" + description: "Write the transcripts that are linked by orphaned reads.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--write_unmapped_names" + description: "Write the names of un-mapped reads to the file unmapped_names.txt\ + \ in the auxiliary directory.\n" + info: null + direction: "input" +- name: "Alignment-specific options" + arguments: + - type: "boolean_true" + name: "--no_error_model" + description: "Turn off the alignment error model, which takes into account the\ + \ the observed frequency of different types of mismatches / indels when computing\ + \ the likelihood of a given alignment. Turning this off can speed up alignment-based\ + \ salmon, but can harm quantification accuracy.\n" + info: null + direction: "input" + - type: "integer" + name: "--num_error_bins" + description: "The number of bins into which to divide each read when learning\ + \ and applying the error model. For example, a value of 10 would mean that\ + \ effectively, a separate error model is leared and applied to each 10th of\ + \ the read, while a value of 3 would mean that a separate error model is applied\ + \ to the read beginning (first third), middle (second third) and end (final\ + \ third).\n" + info: null + example: + - 6 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--sample_out" + alternatives: + - "-s" + description: "Write a \"postSample.bam\" file in the output directory that will\ + \ sample the input alignments according to the estimated transcript abundances.\ + \ If you're going to perform downstream analysis of the alignments with tools\ + \ which don't, themselves, take fragment assignment ambiguity into account,\ + \ you should use this output.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--sample_unaligned" + alternatives: + - "-u" + description: "In addition to sampling the aligned reads, also write the un-aligned\ + \ reads to \"postSample.bam\".\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--gencode" + description: "This flag will expect the input transcript fasta to be in GENCODE\ + \ format, and will split the transcript name at the first '|' character. These\ + \ reduced names will be used in the output and when looking for these transcripts\ + \ in a gene to transcript GTF.\n" + info: null + direction: "input" + - type: "integer" + name: "--mapping_cache_memory_limit" + description: "If the file contained fewer than this many mapped reads, then just\ + \ keep the data in memory for subsequent rounds of inference. Obviously, this\ + \ value should not be too large if you wish to keep a low memory usage, but\ + \ setting it large enough to accommodate all of the mapped read can substantially\ + \ speed up inference on \"small\" files that contain only a few million reads.\n" + info: null + example: + - 2000000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Salmon is a tool for wicked-fast transcript quantification from RNA-seq\ + \ data. It can either make use of pre-computed alignments (in the form of a SAM/BAM\ + \ file) to the transcripts rather than the raw reads, or can be run in the mapping-based\ + \ mode. \n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "Transcriptome" +- "Quantification" +license: "GPL-3.0" +references: + doi: + - "10.1038/nmeth.4197" +links: + repository: "https://github.com/COMBINE-lab/salmon" + homepage: "https://salmon.readthedocs.io/en/latest/salmon.html" + documentation: "https://salmon.readthedocs.io/en/latest/salmon.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/salmon:1.10.2--hecfa306_0" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "salmon index -v 2>&1 | sed 's/salmon \\([0-9.]*\\)/salmon: \\1/' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/salmon/salmon_quant/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/salmon/salmon_quant" + executable: "target/nextflow/salmon/salmon_quant/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/main.nf new file mode 100644 index 0000000..8a9de54 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/main.nf @@ -0,0 +1,5009 @@ +// salmon_quant v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Sai Nirmayi Yasa (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "salmon_quant", + "namespace" : "salmon", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Sai Nirmayi Yasa", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "nirmayi@data-intuitive.com", + "github" : "sainirmayi", + "linkedin" : "sai-nirmayi-yasa" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Junior Bioinformatics Researcher" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Common input options", + "arguments" : [ + { + "type" : "string", + "name" : "--lib_type", + "alternatives" : [ + "-l" + ], + "description" : "Format string describing the library.\nThe library type string consists of three parts: \n1. Relative orientation of the reads: This part is only provided if the library is paired-end, The possible options are\n I = inward\n O = outward\n M = matching\n2. Strandedness of the library: This part specifies whether the protocol is stranded or unstranded. The options are:\n S = stranded\n U = unstranded\n3. Directionality of the reads: If the library is stranded, the final part of the library string is used to specify the strand from which the read originates. The possible values are\n F = read 1 (or single-end read) comes from the forward strand\n R = read 1 (or single-end read) comes from the reverse strand\n", + "default" : [ + "A" + ], + "required" : false, + "choices" : [ + "A", + "U", + "SF", + "SR", + "IU", + "IS", + "ISF", + "ISR", + "OU", + "OS", + "OSF", + "OSR", + "MU", + "MS", + "MSF", + "MSR" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Mapping input options", + "arguments" : [ + { + "type" : "file", + "name" : "--index", + "alternatives" : [ + "-i" + ], + "description" : "Salmon index.\n", + "example" : [ + "transcriptome_index" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--unmated_reads", + "alternatives" : [ + "-r" + ], + "description" : "List of files containing unmated reads of (e.g. single-end reads).\n", + "example" : [ + "sample.fq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--mates1", + "alternatives" : [ + "-m1" + ], + "description" : "File containing the #1 mates.\n", + "example" : [ + "sample_1.fq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--mates2", + "alternatives" : [ + "-m2" + ], + "description" : "File containing the #2 mates.\n", + "example" : [ + "sample_2.fq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Alignment input options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--discard_orphans", + "description" : "Discard orphan alignments in the input [for alignment-based mode only]. If this flag is passed, then only paired alignments will be considered toward quantification estimates. The default behavior is to consider orphan alignments if no valid paired mappings exist.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--alignments", + "alternatives" : [ + "-a" + ], + "description" : "Input alignment (BAM) file(s).\n", + "example" : [ + "sample.fq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--eqclasses", + "alternatives" : [ + "-e" + ], + "description" : "input salmon weighted equivalence class file.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--targets", + "alternatives" : [ + "-t" + ], + "description" : "FASTA format file containing target transcripts.\n", + "example" : [ + "transcripts.fasta" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--ont", + "description" : "Use alignment model for Oxford Nanopore long reads\n", + "direction" : "input" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output quantification directory.\n", + "example" : [ + "quant_output" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--quant_results", + "description" : "Salmon quantification file.\n", + "example" : [ + "quant.sf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Basic options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--seq_bias", + "description" : "Perform sequence-specific bias correction.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--gc_bias", + "description" : "Perform fragment GC bias correction [beta for single-end reads].\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--pos_bias", + "description" : "Perform positional bias correction.\n", + "direction" : "input" + }, + { + "type" : "double", + "name" : "--incompat_prior", + "description" : "Set the prior probability that an alignment that disagrees with the specified library type (--lib_type) results from the true fragment origin. Setting this to 0 specifies that alignments that disagree with the library type should be \\"impossible\\", while setting it to 1 says that alignments that disagree with the library type are no less likely than those that do.\n", + "example" : [ + 0.0 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gene_map", + "alternatives" : [ + "-g" + ], + "description" : "File containing a mapping of transcripts to genes. If this file is provided salmon will output both quant.sf and quant.genes.sf files, where the latter contains aggregated gene-level abundance estimates. The transcript to gene mapping should be provided as either a GTF file, or a in a simple tab-delimited format where each line contains the name of a transcript and the gene to which it belongs separated by a tab. The extension of the file is used to determine how the file should be parsed. Files ending in '.gtf', '.gff' or '.gff3' are assumed to be in GTF format; files with any other extension are assumed to be in the simple format. In GTF / GFF format, the \\"transcript_id\\" is assumed to contain the transcript identifier and the \\"gene_id\\" is assumed to contain the corresponding gene identifier.\n", + "example" : [ + "gene_map.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--aux_target_file", + "description" : "A file containing a list of \\"auxiliary\\" targets. These are valid targets (i.e., not decoys) to which fragments are allowed to map and be assigned, and which will be quantified, but for which auxiliary models like sequence-specific and fragment-GC bias correction should not be applied.\n", + "example" : [ + "auxilary_targets.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--meta", + "description" : "If you're using Salmon on a metagenomic dataset, consider setting this flag to disable parts of the abundance estimation model that make less sense for metagenomic data.\n", + "direction" : "input" + }, + { + "type" : "double", + "name" : "--score_exp", + "description" : "The factor by which sub-optimal alignment scores are downweighted to produce a probability. If the best alignment score for the current read is S, and the score for a particular alignment is w, then the probability will be computed porportional to exp( - scoreExp * (S-w) ).\n", + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options specific to mapping mode", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--discard_orphans_quasi", + "description" : "[selective-alignment mode only] \nDiscard orphan mappings in selective-alignment mode. If this flag is passed then only paired mappings will be considered toward quantification estimates. The default behavior is to consider orphan mappings if no valid paired mappings exist. This flag is independent of the option to write the orphaned mappings to file (--writeOrphanLinks).\n", + "direction" : "input" + }, + { + "type" : "double", + "name" : "--consensus_slack", + "description" : "[selective-alignment mode only] \nThe amount of slack allowed in the selective-alignment filtering mechanism. If this is set to a fraction, X, greater than 0 (and in [0,1)), then uniMEM chains with scores below (100 * X)% of the best chain score for a read, and read pairs with a sum of chain scores below (100 * X)% of the best chain score for a read pair will be discounted as a mapping candidates. The default value of this option is 0.35.\n", + "example" : [ + 0.35 + ], + "required" : false, + "min" : 0.0, + "max" : 0.999999999, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--pre_merge_chain_sub_thresh", + "description" : "[selective-alignment mode only] \nThe threshold of sub-optimal chains, compared to the best chain on a given target, that will be retained and passed to the next phase of mapping. Specifically, if the best chain for a read (or read-end in paired-end mode) to target t has score X_t, then all chains for this read with score >= X_t * preMergeChainSubThresh will be retained and passed to subsequent mapping phases. This value must be in the range [0, 1].\n", + "example" : [ + 0.75 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--post_merge_chain_sub_thresh", + "description" : "[selective-alignment mode only] \nThe threshold of sub-optimal chains, compared to the best chain on a given target, that will be retained and passed to the next phase of mapping. This is different than post_merge_chain_sub_thresh, because this is applied to pairs of chains (from the ends of paired-end reads) after merging (i.e. after checking concordancy constraints etc.). Specifically, if the best chain pair to target t has score X_t, then all chain pairs for this read pair with score >= X_t * post_merge_chain_sub_thresh will be retained and passed to subsequent mapping phases. This value must be in the range [0, 1]. Note: This option is only meaningful for paired-end libraries, and is ignored for single-end libraries.\n", + "example" : [ + 0.9 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--orphan_chain_sub_thresh", + "description" : "[selective-alignment mode only]\nThis threshold sets a global sub-optimality threshold for chains corresponding to orphan mappings. That is, if the merging procedure results in no concordant mappings then only orphan mappings with a chain score >= orphan_chain_sub_thresh * bestChainScore will be retained and passed to subsequent mapping phases. This value must be in the range [0, 1]. Note: This option is only meaningful for paired-end libraries, and is ignored for single-end libraries.\n", + "example" : [ + 0.95 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--min_score_fraction", + "description" : "[selective-alignment mode only]\nThe fraction of the optimal possible alignment score that a mapping must achieve in order to be considered \\"valid\\" --- should be in (0,1]. Default 0.65\n", + "example" : [ + 0.65 + ], + "required" : false, + "min" : 1.0E-9, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--mismatch_seed_skip", + "description" : "[selective-alignment mode only]\nAfter a k-mer hit is extended to a uni-MEM, the uni-MEM extension can terminate for one of 3 reasons; the end of the read, the end of the unitig, or a mismatch. If the extension ends because of a mismatch, this is likely the result of a sequencing error. To avoid looking up many k-mers that will likely fail to be located in the index, the search procedure skips by a factor of mismatch_seed_skip until it either (1) finds another match or (2) is k-bases past the mismatch position. This value controls that skip length. A smaller value can increase sensitivity, while a larger value can speed up seeding.\n", + "example" : [ + 3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--disable_chaining_heuristic", + "description" : "[selective-alignment mode only] \nBy default, the heuristic of (Li 2018) is implemented, which terminates the chaining DP once a given number of valid backpointers are found. This speeds up the seed (MEM) chaining step, but may result in sub-optimal chains in complex situations (e.g. sequences with many repeats and overlapping repeats). Passing this flag will disable the chaining heuristic, and perform the full chaining dynamic program, guaranteeing the optimal chain is found in this step.\n", + "direction" : "input" + }, + { + "type" : "double", + "name" : "--decoy_threshold", + "description" : "[selective-alignment mode only]\nFor an alignemnt to an annotated transcript to be considered invalid, it must have an alignment score < (decoy_threshold * bestDecoyScore). A value of 1.0 means that any alignment strictly worse than the best decoy alignment will be discarded. A smaller value will allow reads to be allocated to transcripts even if they strictly align better to the decoy sequence.\n", + "example" : [ + 1.0 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--ma", + "description" : "[selective-alignment mode only]\nThe value given to a match between read and reference nucleotides in an alignment.\n", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--mp", + "description" : "[selective-alignment mode only]\nThe value given to a mis-match between read and reference nucleotides in an alignment.\n", + "example" : [ + -4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--go", + "description" : "[selective-alignment mode only]\nThe value given to a gap opening in an alignment.\n", + "example" : [ + 6 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--ge", + "description" : "[selective-alignment mode only]\nThe value given to a gap extension in an alignment.\n", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bandwidth", + "description" : "[selective-alignment mode only]\nThe value used for the bandwidth passed to ksw2. A smaller bandwidth can make the alignment verification run more quickly, but could possibly miss valid alignments.\n", + "example" : [ + 15 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--allow_dovetail", + "description" : "[selective-alignment mode only] \nAllow dovetailing mappings.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--recover_orphans", + "description" : "[selective-alignment mode only] \nAttempt to recover the mates of orphaned reads. This uses edlib for orphan recovery, and so introduces some computational overhead, but it can improve sensitivity.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--mimicBT2", + "description" : "[selective-alignment mode only] \nSet flags to mimic parameters similar to Bowtie2 with --no-discordant and --no-mixed flags. This increases disallows dovetailing reads, and discards orphans. Note, this does not impose the very strict parameters assumed by RSEM+Bowtie2, like gapless alignments. For that behavior, use the --mimic_strictBT2 flag below.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--mimic_strictBT2", + "description" : "[selective-alignment mode only] \nSet flags to mimic the very strict parameters used by RSEM+Bowtie2. This increases --min_score_fraction to 0.8, disallows dovetailing reads, discards orphans, and disallows gaps in alignments.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--softclip", + "description" : "[selective-alignment mode only] \nAllos soft-clipping of reads during selective-alignment. If this option is provided, then regions at the beginning or end of the read can be withheld from alignment without any effect on the resulting score (i.e. neither adding nor removing from the score). This will drastically reduce the penalty if there are mismatches at the beginning or end of the read due to e.g. low-quality bases or adapters. NOTE: Even with soft-clipping enabled, the read must still achieve a score of at least min_score_fraction * maximum achievable score, where the maximum achievable score is computed based on the full (un-clipped) read length.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--softclip_overhangs", + "description" : "[selective-alignment mode only] \nAllow soft-clipping of reads that overhang the beginning or ends of the transcript. In this case, the overhaning section of the read will simply be unaligned, and will not contribute or detract from the alignment score. The default policy is to force an end-to-end alignment of the entire read, so that overhanings will result in some deletion of nucleotides from the read.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--full_length_alignment", + "description" : "[selective-alignment mode only] \nPerform selective alignment over the full length of the read, beginning from the (approximate) initial mapping location and using extension alignment. This is in contrast with the default behavior which is to only perform alignment between the MEMs in the optimal chain (and before the first and after the last MEM if applicable). The default strategy forces the MEMs to belong to the alignment, but has the benefit that it can discover indels prior to the first hit shared between the read and reference. Except in very rare circumstances, the default mode should be more accurate.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--hard_filter", + "description" : "[selective-alignment mode only] \nInstead of weighting mappings by their alignment score, this flag will discard any mappings with sub-optimal alignment score. The default option of soft-filtering (i.e. weighting mappings by their alignment score) usually yields slightly more accurate abundance estimates but this flag may be desirable if you want more accurate 'naive' equivalence classes, rather than range factorized equivalence classes.\n", + "direction" : "input" + }, + { + "type" : "double", + "name" : "--min_aln_prob", + "description" : "The minimum number of fragments that must be assigned to the transcriptome for quantification to proceed.\n", + "example" : [ + 1.0E-5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--write_mappings", + "alternatives" : [ + "-z" + ], + "description" : "If this option is provided, then the selective-alignment results will be written out in SAM-compatible format. By default, output will be directed to stdout, but an alternative file name can be provided instead.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--mapping_sam", + "description" : "Path to file that should output the selective-alignment results in SAM-compatible format. This option must be provided while using --write_mappings", + "example" : [ + "mappings.sam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--write_qualities", + "description" : "This flag only has meaning if mappings are being written (with --write_mappings/-z). If this flag is provided, then the output SAM file will contain quality strings as well as read sequences. Note that this can greatly increase the size of the output file.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--hit_filter_policy", + "description" : "[selective-alignment mode only]\nDetermines the policy by which hits are filtered in selective alignment. Filtering hits after chaining (the default) is more sensitive, but more computationally intensive, because it performs the chaining dynamic program for all hits. Filtering before chaining is faster, but some true hits may be missed. The options are BEFORE, AFTER, BOTH and NONE.\n", + "example" : [ + "AFTER" + ], + "required" : false, + "choices" : [ + "BEFORE", + "AFTER", + "BOTH", + "NONE" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Advance options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--alternative_init_mode", + "description" : "Use an alternative strategy (rather than simple interpolation between) the online and uniform abundance estimates to initialize the EM / VBEM algorithm.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--aux_dir", + "description" : "The sub-directory of the quantification directory where auxiliary information e.g. bootstraps, bias parameters, etc. will be written.\n", + "example" : [ + "aux_info" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--skip_quant", + "description" : "Skip performing the actual transcript quantification (including any Gibbs sampling or bootstrapping).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--dump_eq", + "description" : "Dump the simple equivalence class counts that were computed during mapping or alignment.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--dump_eq_weights", + "alternatives" : [ + "-d" + ], + "description" : "Dump conditional probabilities associated with transcripts when equivalence class information is being dumped to file. Note, this will dump the factorization that is actually used by salmon's offline phase for inference. If you are using range-factorized equivalence classes (the default) then the same transcript set may appear multiple times with different associated conditional probabilities.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--min_assigned_frags", + "description" : "The minimum number of fragments that must be assigned to the transcriptome for quantification to proceed.\n", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--reduce_GC_memory", + "description" : "If this option is selected, a more memory efficient (but slightly slower) representation is used to compute fragment GC content. Enabling this will reduce memory usage, but can also reduce speed. However, the results themselves will remain the same.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--bias_speed_samp", + "description" : "The value at which the fragment length PMF is down-sampled when evaluating sequence-specific & GC fragment bias. Larger values speed up effective length correction, but may decrease the fidelity of bias modeling results.\n", + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--fld_max", + "description" : "The maximum fragment length to consider when building the empirical distribution\n", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--fld_mean", + "description" : "The mean used in the fragment length distribution prior\n", + "example" : [ + 250 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--fld_SD", + "description" : "The standard deviation used in the fragment length distribution prior\n", + "example" : [ + 25 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--forgetting_factor", + "alternatives" : [ + "-f" + ], + "description" : "The forgetting factor used in the online learning schedule. A smallervalue results in quicker learning, but higher variance and may be unstable. A larger value results in slower learning but may be more stable. Value should be in the interval (0.5, 1.0].\n", + "example" : [ + 0.65 + ], + "required" : false, + "min" : 0.500000001, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--init_uniform", + "description" : "Initialize the offline inference with uniform parameters, rather than seeding with online parameters.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--max_occs_per_hit", + "description" : "When collecting \\"hits\\" (MEMs), hits having more than max_occs_per_hit occurrences won't be considered.\n", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--max_read_occ", + "description" : "Reads \\"mapping\\" to more than this many places won't be considered.\n", + "example" : [ + 200 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--no_length_correction", + "description" : "Entirely disables length correction when estimating the abundance of transcripts. This option can be used with protocols where one expects that fragments derive from their underlying targets without regard to that target's length (e.g. QuantSeq)\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_effective_length_correction", + "description" : "Disables effective length correction when computing the probability that a fragment was generated from a transcript. If this flag is passed in,the fragment length distribution is not taken into account when computing this probability.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_single_frag_prob", + "description" : "Disables the estimation of an associated fragment length probability for single-end reads or for orphaned mappings in paired-end libraries. The default behavior is to consider the probability of all possible fragment lengths associated with the retained mapping. Enabling this flag (i.e. turning this default behavior off) will simply not attempt to estimate a fragment length probability in such cases.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_frag_length_dist", + "description" : "Don't consider concordance with the learned fragment length distribution when trying to determine the probability that a fragment has originated from a specified location. Normally, Fragments with unlikely lengths will be assigned a smaller relative probability than those with more likely lengths. When this flag is passed in, the observed fragment length has no effect on that fragment's a priori probability.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_bias_length_threshold", + "description" : "If this option is enabled, then no (lower) threshold will be set on how short bias correction can make effective lengths. This can increase the precision of bias correction, but harm robustness. The default correction applies a threshold.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--num_bias_samples", + "description" : "Number of fragment mappings to use when learning the sequence-specific bias model.\n", + "example" : [ + 2000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--num_aux_model_samples", + "description" : "The first are used to train the auxiliary model parameters (e.g. fragment length distribution, bias, etc.). After ther first observations the auxiliary model parameters will be assumed to have converged and will be fixed.\n", + "example" : [ + 5000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--num_pre_aux_model_samples", + "description" : "The first will have their assignment likelihoods and contributions to the transcript abundances computed without applying any auxiliary models. The purpose of ignoring the auxiliary models for the first observations is to avoid applying these models before their parameters have been learned sufficiently well.\n", + "example" : [ + 5000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--useEM", + "description" : "Use the traditional EM algorithm for optimization in the batch passes.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--useVBOpt", + "description" : "Use the Variational Bayesian EM [default]\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--range_factorization_bins", + "description" : "Factorizes the likelihood used in quantification by adopting a new notion of equivalence classes based on the conditional probabilities with which fragments are generated from different transcripts. This is a more fine-grained factorization than the normal rich equivalence classes. The default value (4) corresponds to the default used in Zakeri et al. 2017 (doi: 10.1093/bioinformatics/btx262), and larger values imply a more fine-grained factorization. If range factorization is enabled, a common value to select for this parameter is 4. A value of 0 signifies the use of basic rich equivalence classes.\n", + "example" : [ + 4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--num_Gibbs_samples", + "description" : "Number of Gibbs sampling rounds to perform.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--no_Gamma_draw", + "description" : "This switch will disable drawing transcript fractions from a Gamma distribution during Gibbs sampling. In this case the sampler does not account for shot-noise, but only assignment ambiguity\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--num_bootstraps", + "description" : "Number of bootstrap samples to generate. Note: This is mutually exclusive with Gibbs sampling.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--bootstrap_reproject", + "description" : "This switch will learn the parameter distribution from the bootstrapped counts for each sample, but will reproject those parameters onto the original equivalence class counts.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--thinning_factor", + "description" : "Number of steps to discard for every sample kept from the Gibbs chain. The larger this number, the less chance that subsequent samples are auto-correlated, but the slower sampling becomes.\n", + "example" : [ + 16 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--quiet", + "alternatives" : [ + "-q" + ], + "description" : "Be quiet while doing quantification (don't write informative output to the console unless something goes wrong).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--per_transcript_prior", + "description" : "The prior (either the default or the argument provided via --vb_prior) will be interpreted as a transcript-level prior (i.e. each transcript will be given a prior read count of this value)\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--per_nucleotide_prior", + "description" : "The prior (either the default or the argument provided via --vb_prior) will be interpreted as a nucleotide-level prior (i.e. each nucleotide will be given a prior read count of this value)\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--sig_digits", + "description" : "The number of significant digits to write when outputting the EffectiveLength and NumReads columns\n", + "example" : [ + 3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--vb_prior", + "description" : "The prior that will be used in the VBEM algorithm. This is interpreted as a per-transcript prior, unless the --per_nucleotide_prior flag is also given. If the --per_nucleotide_prior flag is given, this is used as a nucleotide-level prior. If the default is used, it will be divided by 1000 before being used as a nucleotide-level prior, i.e. the default per-nucleotide prior will be 1e-5.\n", + "example" : [ + 0.01 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--write_orphan_links", + "description" : "Write the transcripts that are linked by orphaned reads.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--write_unmapped_names", + "description" : "Write the names of un-mapped reads to the file unmapped_names.txt in the auxiliary directory.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Alignment-specific options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--no_error_model", + "description" : "Turn off the alignment error model, which takes into account the the observed frequency of different types of mismatches / indels when computing the likelihood of a given alignment. Turning this off can speed up alignment-based salmon, but can harm quantification accuracy.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--num_error_bins", + "description" : "The number of bins into which to divide each read when learning and applying the error model. For example, a value of 10 would mean that effectively, a separate error model is leared and applied to each 10th of the read, while a value of 3 would mean that a separate error model is applied to the read beginning (first third), middle (second third) and end (final third).\n", + "example" : [ + 6 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--sample_out", + "alternatives" : [ + "-s" + ], + "description" : "Write a \\"postSample.bam\\" file in the output directory that will sample the input alignments according to the estimated transcript abundances. If you're going to perform downstream analysis of the alignments with tools which don't, themselves, take fragment assignment ambiguity into account, you should use this output.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--sample_unaligned", + "alternatives" : [ + "-u" + ], + "description" : "In addition to sampling the aligned reads, also write the un-aligned reads to \\"postSample.bam\\".\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--gencode", + "description" : "This flag will expect the input transcript fasta to be in GENCODE format, and will split the transcript name at the first '|' character. These reduced names will be used in the output and when looking for these transcripts in a gene to transcript GTF.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--mapping_cache_memory_limit", + "description" : "If the file contained fewer than this many mapped reads, then just keep the data in memory for subsequent rounds of inference. Obviously, this value should not be too large if you wish to keep a low memory usage, but setting it large enough to accommodate all of the mapped read can substantially speed up inference on \\"small\\" files that contain only a few million reads.\n", + "example" : [ + 2000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Salmon is a tool for wicked-fast transcript quantification from RNA-seq data. It can either make use of pre-computed alignments (in the form of a SAM/BAM file) to the transcripts rather than the raw reads, or can be run in the mapping-based mode. \n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "Transcriptome", + "Quantification" + ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.1038/nmeth.4197" + ] + }, + "links" : { + "repository" : "https://github.com/COMBINE-lab/salmon", + "homepage" : "https://salmon.readthedocs.io/en/latest/salmon.html", + "documentation" : "https://salmon.readthedocs.io/en/latest/salmon.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/salmon:1.10.2--hecfa306_0", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "salmon index -v 2>&1 | sed 's/salmon \\\\([0-9.]*\\\\)/salmon: \\\\1/' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/salmon/salmon_quant/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/salmon/salmon_quant", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -e + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_LIB_TYPE+x} ]; then echo "${VIASH_PAR_LIB_TYPE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_lib_type='&'#" ; else echo "# par_lib_type="; fi ) +$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) +$( if [ ! -z ${VIASH_PAR_UNMATED_READS+x} ]; then echo "${VIASH_PAR_UNMATED_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_unmated_reads='&'#" ; else echo "# par_unmated_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_MATES1+x} ]; then echo "${VIASH_PAR_MATES1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mates1='&'#" ; else echo "# par_mates1="; fi ) +$( if [ ! -z ${VIASH_PAR_MATES2+x} ]; then echo "${VIASH_PAR_MATES2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mates2='&'#" ; else echo "# par_mates2="; fi ) +$( if [ ! -z ${VIASH_PAR_DISCARD_ORPHANS+x} ]; then echo "${VIASH_PAR_DISCARD_ORPHANS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_discard_orphans='&'#" ; else echo "# par_discard_orphans="; fi ) +$( if [ ! -z ${VIASH_PAR_ALIGNMENTS+x} ]; then echo "${VIASH_PAR_ALIGNMENTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_alignments='&'#" ; else echo "# par_alignments="; fi ) +$( if [ ! -z ${VIASH_PAR_EQCLASSES+x} ]; then echo "${VIASH_PAR_EQCLASSES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_eqclasses='&'#" ; else echo "# par_eqclasses="; fi ) +$( if [ ! -z ${VIASH_PAR_TARGETS+x} ]; then echo "${VIASH_PAR_TARGETS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_targets='&'#" ; else echo "# par_targets="; fi ) +$( if [ ! -z ${VIASH_PAR_ONT+x} ]; then echo "${VIASH_PAR_ONT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ont='&'#" ; else echo "# par_ont="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_QUANT_RESULTS+x} ]; then echo "${VIASH_PAR_QUANT_RESULTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quant_results='&'#" ; else echo "# par_quant_results="; fi ) +$( if [ ! -z ${VIASH_PAR_SEQ_BIAS+x} ]; then echo "${VIASH_PAR_SEQ_BIAS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seq_bias='&'#" ; else echo "# par_seq_bias="; fi ) +$( if [ ! -z ${VIASH_PAR_GC_BIAS+x} ]; then echo "${VIASH_PAR_GC_BIAS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gc_bias='&'#" ; else echo "# par_gc_bias="; fi ) +$( if [ ! -z ${VIASH_PAR_POS_BIAS+x} ]; then echo "${VIASH_PAR_POS_BIAS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pos_bias='&'#" ; else echo "# par_pos_bias="; fi ) +$( if [ ! -z ${VIASH_PAR_INCOMPAT_PRIOR+x} ]; then echo "${VIASH_PAR_INCOMPAT_PRIOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_incompat_prior='&'#" ; else echo "# par_incompat_prior="; fi ) +$( if [ ! -z ${VIASH_PAR_GENE_MAP+x} ]; then echo "${VIASH_PAR_GENE_MAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gene_map='&'#" ; else echo "# par_gene_map="; fi ) +$( if [ ! -z ${VIASH_PAR_AUX_TARGET_FILE+x} ]; then echo "${VIASH_PAR_AUX_TARGET_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_aux_target_file='&'#" ; else echo "# par_aux_target_file="; fi ) +$( if [ ! -z ${VIASH_PAR_META+x} ]; then echo "${VIASH_PAR_META}" | sed "s#'#'\\"'\\"'#g;s#.*#par_meta='&'#" ; else echo "# par_meta="; fi ) +$( if [ ! -z ${VIASH_PAR_SCORE_EXP+x} ]; then echo "${VIASH_PAR_SCORE_EXP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_score_exp='&'#" ; else echo "# par_score_exp="; fi ) +$( if [ ! -z ${VIASH_PAR_DISCARD_ORPHANS_QUASI+x} ]; then echo "${VIASH_PAR_DISCARD_ORPHANS_QUASI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_discard_orphans_quasi='&'#" ; else echo "# par_discard_orphans_quasi="; fi ) +$( if [ ! -z ${VIASH_PAR_CONSENSUS_SLACK+x} ]; then echo "${VIASH_PAR_CONSENSUS_SLACK}" | sed "s#'#'\\"'\\"'#g;s#.*#par_consensus_slack='&'#" ; else echo "# par_consensus_slack="; fi ) +$( if [ ! -z ${VIASH_PAR_PRE_MERGE_CHAIN_SUB_THRESH+x} ]; then echo "${VIASH_PAR_PRE_MERGE_CHAIN_SUB_THRESH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pre_merge_chain_sub_thresh='&'#" ; else echo "# par_pre_merge_chain_sub_thresh="; fi ) +$( if [ ! -z ${VIASH_PAR_POST_MERGE_CHAIN_SUB_THRESH+x} ]; then echo "${VIASH_PAR_POST_MERGE_CHAIN_SUB_THRESH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_post_merge_chain_sub_thresh='&'#" ; else echo "# par_post_merge_chain_sub_thresh="; fi ) +$( if [ ! -z ${VIASH_PAR_ORPHAN_CHAIN_SUB_THRESH+x} ]; then echo "${VIASH_PAR_ORPHAN_CHAIN_SUB_THRESH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_orphan_chain_sub_thresh='&'#" ; else echo "# par_orphan_chain_sub_thresh="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_SCORE_FRACTION+x} ]; then echo "${VIASH_PAR_MIN_SCORE_FRACTION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_score_fraction='&'#" ; else echo "# par_min_score_fraction="; fi ) +$( if [ ! -z ${VIASH_PAR_MISMATCH_SEED_SKIP+x} ]; then echo "${VIASH_PAR_MISMATCH_SEED_SKIP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mismatch_seed_skip='&'#" ; else echo "# par_mismatch_seed_skip="; fi ) +$( if [ ! -z ${VIASH_PAR_DISABLE_CHAINING_HEURISTIC+x} ]; then echo "${VIASH_PAR_DISABLE_CHAINING_HEURISTIC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_disable_chaining_heuristic='&'#" ; else echo "# par_disable_chaining_heuristic="; fi ) +$( if [ ! -z ${VIASH_PAR_DECOY_THRESHOLD+x} ]; then echo "${VIASH_PAR_DECOY_THRESHOLD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_decoy_threshold='&'#" ; else echo "# par_decoy_threshold="; fi ) +$( if [ ! -z ${VIASH_PAR_MA+x} ]; then echo "${VIASH_PAR_MA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ma='&'#" ; else echo "# par_ma="; fi ) +$( if [ ! -z ${VIASH_PAR_MP+x} ]; then echo "${VIASH_PAR_MP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mp='&'#" ; else echo "# par_mp="; fi ) +$( if [ ! -z ${VIASH_PAR_GO+x} ]; then echo "${VIASH_PAR_GO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_go='&'#" ; else echo "# par_go="; fi ) +$( if [ ! -z ${VIASH_PAR_GE+x} ]; then echo "${VIASH_PAR_GE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ge='&'#" ; else echo "# par_ge="; fi ) +$( if [ ! -z ${VIASH_PAR_BANDWIDTH+x} ]; then echo "${VIASH_PAR_BANDWIDTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bandwidth='&'#" ; else echo "# par_bandwidth="; fi ) +$( if [ ! -z ${VIASH_PAR_ALLOW_DOVETAIL+x} ]; then echo "${VIASH_PAR_ALLOW_DOVETAIL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_allow_dovetail='&'#" ; else echo "# par_allow_dovetail="; fi ) +$( if [ ! -z ${VIASH_PAR_RECOVER_ORPHANS+x} ]; then echo "${VIASH_PAR_RECOVER_ORPHANS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_recover_orphans='&'#" ; else echo "# par_recover_orphans="; fi ) +$( if [ ! -z ${VIASH_PAR_MIMICBT2+x} ]; then echo "${VIASH_PAR_MIMICBT2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mimicBT2='&'#" ; else echo "# par_mimicBT2="; fi ) +$( if [ ! -z ${VIASH_PAR_MIMIC_STRICTBT2+x} ]; then echo "${VIASH_PAR_MIMIC_STRICTBT2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mimic_strictBT2='&'#" ; else echo "# par_mimic_strictBT2="; fi ) +$( if [ ! -z ${VIASH_PAR_SOFTCLIP+x} ]; then echo "${VIASH_PAR_SOFTCLIP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_softclip='&'#" ; else echo "# par_softclip="; fi ) +$( if [ ! -z ${VIASH_PAR_SOFTCLIP_OVERHANGS+x} ]; then echo "${VIASH_PAR_SOFTCLIP_OVERHANGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_softclip_overhangs='&'#" ; else echo "# par_softclip_overhangs="; fi ) +$( if [ ! -z ${VIASH_PAR_FULL_LENGTH_ALIGNMENT+x} ]; then echo "${VIASH_PAR_FULL_LENGTH_ALIGNMENT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_full_length_alignment='&'#" ; else echo "# par_full_length_alignment="; fi ) +$( if [ ! -z ${VIASH_PAR_HARD_FILTER+x} ]; then echo "${VIASH_PAR_HARD_FILTER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_hard_filter='&'#" ; else echo "# par_hard_filter="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_ALN_PROB+x} ]; then echo "${VIASH_PAR_MIN_ALN_PROB}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_aln_prob='&'#" ; else echo "# par_min_aln_prob="; fi ) +$( if [ ! -z ${VIASH_PAR_WRITE_MAPPINGS+x} ]; then echo "${VIASH_PAR_WRITE_MAPPINGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_write_mappings='&'#" ; else echo "# par_write_mappings="; fi ) +$( if [ ! -z ${VIASH_PAR_MAPPING_SAM+x} ]; then echo "${VIASH_PAR_MAPPING_SAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mapping_sam='&'#" ; else echo "# par_mapping_sam="; fi ) +$( if [ ! -z ${VIASH_PAR_WRITE_QUALITIES+x} ]; then echo "${VIASH_PAR_WRITE_QUALITIES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_write_qualities='&'#" ; else echo "# par_write_qualities="; fi ) +$( if [ ! -z ${VIASH_PAR_HIT_FILTER_POLICY+x} ]; then echo "${VIASH_PAR_HIT_FILTER_POLICY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_hit_filter_policy='&'#" ; else echo "# par_hit_filter_policy="; fi ) +$( if [ ! -z ${VIASH_PAR_ALTERNATIVE_INIT_MODE+x} ]; then echo "${VIASH_PAR_ALTERNATIVE_INIT_MODE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_alternative_init_mode='&'#" ; else echo "# par_alternative_init_mode="; fi ) +$( if [ ! -z ${VIASH_PAR_AUX_DIR+x} ]; then echo "${VIASH_PAR_AUX_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_aux_dir='&'#" ; else echo "# par_aux_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_SKIP_QUANT+x} ]; then echo "${VIASH_PAR_SKIP_QUANT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_skip_quant='&'#" ; else echo "# par_skip_quant="; fi ) +$( if [ ! -z ${VIASH_PAR_DUMP_EQ+x} ]; then echo "${VIASH_PAR_DUMP_EQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dump_eq='&'#" ; else echo "# par_dump_eq="; fi ) +$( if [ ! -z ${VIASH_PAR_DUMP_EQ_WEIGHTS+x} ]; then echo "${VIASH_PAR_DUMP_EQ_WEIGHTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dump_eq_weights='&'#" ; else echo "# par_dump_eq_weights="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_ASSIGNED_FRAGS+x} ]; then echo "${VIASH_PAR_MIN_ASSIGNED_FRAGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_assigned_frags='&'#" ; else echo "# par_min_assigned_frags="; fi ) +$( if [ ! -z ${VIASH_PAR_REDUCE_GC_MEMORY+x} ]; then echo "${VIASH_PAR_REDUCE_GC_MEMORY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reduce_GC_memory='&'#" ; else echo "# par_reduce_GC_memory="; fi ) +$( if [ ! -z ${VIASH_PAR_BIAS_SPEED_SAMP+x} ]; then echo "${VIASH_PAR_BIAS_SPEED_SAMP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bias_speed_samp='&'#" ; else echo "# par_bias_speed_samp="; fi ) +$( if [ ! -z ${VIASH_PAR_FLD_MAX+x} ]; then echo "${VIASH_PAR_FLD_MAX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fld_max='&'#" ; else echo "# par_fld_max="; fi ) +$( if [ ! -z ${VIASH_PAR_FLD_MEAN+x} ]; then echo "${VIASH_PAR_FLD_MEAN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fld_mean='&'#" ; else echo "# par_fld_mean="; fi ) +$( if [ ! -z ${VIASH_PAR_FLD_SD+x} ]; then echo "${VIASH_PAR_FLD_SD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fld_SD='&'#" ; else echo "# par_fld_SD="; fi ) +$( if [ ! -z ${VIASH_PAR_FORGETTING_FACTOR+x} ]; then echo "${VIASH_PAR_FORGETTING_FACTOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_forgetting_factor='&'#" ; else echo "# par_forgetting_factor="; fi ) +$( if [ ! -z ${VIASH_PAR_INIT_UNIFORM+x} ]; then echo "${VIASH_PAR_INIT_UNIFORM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_init_uniform='&'#" ; else echo "# par_init_uniform="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_OCCS_PER_HIT+x} ]; then echo "${VIASH_PAR_MAX_OCCS_PER_HIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_occs_per_hit='&'#" ; else echo "# par_max_occs_per_hit="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_READ_OCC+x} ]; then echo "${VIASH_PAR_MAX_READ_OCC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_read_occ='&'#" ; else echo "# par_max_read_occ="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_LENGTH_CORRECTION+x} ]; then echo "${VIASH_PAR_NO_LENGTH_CORRECTION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_length_correction='&'#" ; else echo "# par_no_length_correction="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_EFFECTIVE_LENGTH_CORRECTION+x} ]; then echo "${VIASH_PAR_NO_EFFECTIVE_LENGTH_CORRECTION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_effective_length_correction='&'#" ; else echo "# par_no_effective_length_correction="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_SINGLE_FRAG_PROB+x} ]; then echo "${VIASH_PAR_NO_SINGLE_FRAG_PROB}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_single_frag_prob='&'#" ; else echo "# par_no_single_frag_prob="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_FRAG_LENGTH_DIST+x} ]; then echo "${VIASH_PAR_NO_FRAG_LENGTH_DIST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_frag_length_dist='&'#" ; else echo "# par_no_frag_length_dist="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_BIAS_LENGTH_THRESHOLD+x} ]; then echo "${VIASH_PAR_NO_BIAS_LENGTH_THRESHOLD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_bias_length_threshold='&'#" ; else echo "# par_no_bias_length_threshold="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_BIAS_SAMPLES+x} ]; then echo "${VIASH_PAR_NUM_BIAS_SAMPLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_bias_samples='&'#" ; else echo "# par_num_bias_samples="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_AUX_MODEL_SAMPLES+x} ]; then echo "${VIASH_PAR_NUM_AUX_MODEL_SAMPLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_aux_model_samples='&'#" ; else echo "# par_num_aux_model_samples="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_PRE_AUX_MODEL_SAMPLES+x} ]; then echo "${VIASH_PAR_NUM_PRE_AUX_MODEL_SAMPLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_pre_aux_model_samples='&'#" ; else echo "# par_num_pre_aux_model_samples="; fi ) +$( if [ ! -z ${VIASH_PAR_USEEM+x} ]; then echo "${VIASH_PAR_USEEM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_useEM='&'#" ; else echo "# par_useEM="; fi ) +$( if [ ! -z ${VIASH_PAR_USEVBOPT+x} ]; then echo "${VIASH_PAR_USEVBOPT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_useVBOpt='&'#" ; else echo "# par_useVBOpt="; fi ) +$( if [ ! -z ${VIASH_PAR_RANGE_FACTORIZATION_BINS+x} ]; then echo "${VIASH_PAR_RANGE_FACTORIZATION_BINS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_range_factorization_bins='&'#" ; else echo "# par_range_factorization_bins="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_GIBBS_SAMPLES+x} ]; then echo "${VIASH_PAR_NUM_GIBBS_SAMPLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_Gibbs_samples='&'#" ; else echo "# par_num_Gibbs_samples="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_GAMMA_DRAW+x} ]; then echo "${VIASH_PAR_NO_GAMMA_DRAW}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_Gamma_draw='&'#" ; else echo "# par_no_Gamma_draw="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_BOOTSTRAPS+x} ]; then echo "${VIASH_PAR_NUM_BOOTSTRAPS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_bootstraps='&'#" ; else echo "# par_num_bootstraps="; fi ) +$( if [ ! -z ${VIASH_PAR_BOOTSTRAP_REPROJECT+x} ]; then echo "${VIASH_PAR_BOOTSTRAP_REPROJECT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bootstrap_reproject='&'#" ; else echo "# par_bootstrap_reproject="; fi ) +$( if [ ! -z ${VIASH_PAR_THINNING_FACTOR+x} ]; then echo "${VIASH_PAR_THINNING_FACTOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_thinning_factor='&'#" ; else echo "# par_thinning_factor="; fi ) +$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) +$( if [ ! -z ${VIASH_PAR_PER_TRANSCRIPT_PRIOR+x} ]; then echo "${VIASH_PAR_PER_TRANSCRIPT_PRIOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_per_transcript_prior='&'#" ; else echo "# par_per_transcript_prior="; fi ) +$( if [ ! -z ${VIASH_PAR_PER_NUCLEOTIDE_PRIOR+x} ]; then echo "${VIASH_PAR_PER_NUCLEOTIDE_PRIOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_per_nucleotide_prior='&'#" ; else echo "# par_per_nucleotide_prior="; fi ) +$( if [ ! -z ${VIASH_PAR_SIG_DIGITS+x} ]; then echo "${VIASH_PAR_SIG_DIGITS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sig_digits='&'#" ; else echo "# par_sig_digits="; fi ) +$( if [ ! -z ${VIASH_PAR_VB_PRIOR+x} ]; then echo "${VIASH_PAR_VB_PRIOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_vb_prior='&'#" ; else echo "# par_vb_prior="; fi ) +$( if [ ! -z ${VIASH_PAR_WRITE_ORPHAN_LINKS+x} ]; then echo "${VIASH_PAR_WRITE_ORPHAN_LINKS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_write_orphan_links='&'#" ; else echo "# par_write_orphan_links="; fi ) +$( if [ ! -z ${VIASH_PAR_WRITE_UNMAPPED_NAMES+x} ]; then echo "${VIASH_PAR_WRITE_UNMAPPED_NAMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_write_unmapped_names='&'#" ; else echo "# par_write_unmapped_names="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_ERROR_MODEL+x} ]; then echo "${VIASH_PAR_NO_ERROR_MODEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_error_model='&'#" ; else echo "# par_no_error_model="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_ERROR_BINS+x} ]; then echo "${VIASH_PAR_NUM_ERROR_BINS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_error_bins='&'#" ; else echo "# par_num_error_bins="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_OUT+x} ]; then echo "${VIASH_PAR_SAMPLE_OUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_out='&'#" ; else echo "# par_sample_out="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_UNALIGNED+x} ]; then echo "${VIASH_PAR_SAMPLE_UNALIGNED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_unaligned='&'#" ; else echo "# par_sample_unaligned="; fi ) +$( if [ ! -z ${VIASH_PAR_GENCODE+x} ]; then echo "${VIASH_PAR_GENCODE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gencode='&'#" ; else echo "# par_gencode="; fi ) +$( if [ ! -z ${VIASH_PAR_MAPPING_CACHE_MEMORY_LIMIT+x} ]; then echo "${VIASH_PAR_MAPPING_CACHE_MEMORY_LIMIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mapping_cache_memory_limit='&'#" ; else echo "# par_mapping_cache_memory_limit="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +unset_if_false=( + par_discard_orphans + par_ont + par_seq_bias + par_gc_bias + par_pos_bias + par_meta + par_discard_orphans_quasi + par_disable_chaining_heuristic + par_allow_dovetail + par_recover_orphans + par_mimicBT2 + par_mimic_strictBT2 + par_softclip + par_softclip_overhangs + par_full_length_alignment + par_hard_filter + par_write_mappings + par_write_qualities + par_alternative_init_mode + par_skip_quant + par_dump_eq + par_dump_eq_weights + par_reduce_GC_memory + par_init_uniform + par_no_length_correction + par_no_effective_length_correction + par_no_single_frag_prob + par_no_frag_length_dist + par_no_bias_length_threshold + par_useEM + par_useVBOpt + par_no_Gamma_draw + par_bootstrap_reproject + par_quiet + par_per_transcript_prior + par_per_nucleotide_prior + par_write_orphan_links + par_write_unmapped_names + par_no_error_model + par_sample_out + par_sample_unaligned + par_gencode +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +IFS=";" read -ra unmated_reads <<< \\$par_unmated_reads +IFS=";" read -ra mates1 <<< \\$par_mates1 +IFS=";" read -ra mates2 <<< \\$par_mates2 +IFS=";" read -ra alignment <<< \\$par_alignments + +salmon quant \\\\ + \\${par_lib_type:+-l "\\${par_lib_type}"} \\\\ + \\${par_index:+-i "\\${par_index}"} \\\\ + \\${par_unmated_reads:+-r \\${unmated_reads[*]}} \\\\ + \\${par_mates1:+-1 \\${mates1[*]}} \\\\ + \\${par_mates2:+-2 \\${mates2[*]}} \\\\ + \\${par_alignments:+-a \\${alignment[*]}} \\\\ + \\${par_discard_orphans:+--discardOrphans} \\\\ + \\${par_eqclasses:+-e "\\${par_eqclasses}"} \\\\ + \\${par_targets:+-t "\\${par_targets}"} \\\\ + \\${par_ont:+--ont} \\\\ + \\${par_output:+-o "\\${par_output}"} \\\\ + \\${par_seq_bias:+--seqBias} \\\\ + \\${par_gc_bias:+--gcBias} \\\\ + \\${par_pos_bias:+--posBias} \\\\ + \\${meta_cpus:+-p "\\${meta_cpus}"} \\\\ + \\${par_incompat_prior:+--incompatPrior "\\${par_incompat_prior}"} \\\\ + \\${par_gene_map:+-g "\\${par_gene_map}"} \\\\ + \\${par_aux_target_file:+--auxTargetFile "\\${par_aux_target_file}"} \\\\ + \\${par_meta:+--meta} \\\\ + \\${par_score_exp:+--scoreExp "\\${par_score_exp}"} \\\\ + \\${par_discard_orphans_quasi:+--discardOrphansQuasi} \\\\ + \\${par_consensus_slack:+--consensusSlack "\\${par_consensus_slack}"} \\\\ + \\${par_pre_merge_chain_sub_thresh:+--preMergeChainSubThresh "\\${par_pre_merge_chain_sub_thresh}"} \\\\ + \\${par_post_merge_chain_sub_thresh:+--postMergeChainSubThresh "\\${par_post_merge_chain_sub_thresh}"} \\\\ + \\${par_orphan_chain_sub_thresh:+--orphanChainSubThresh "\\${par_orphan_chain_sub_thresh}"} \\\\ + \\${par_min_score_fraction:+--minScoreFraction "\\${par_min_score_fraction}"} \\\\ + \\${par_mismatch_seed_skip:+--mismatchSeedSkip "\\${par_mismatch_seed_skip}"} \\\\ + \\${par_disable_chaining_heuristic:+--disableChainingHeuristic} \\\\ + \\${par_decoy_threshold:+--decoyThreshold "\\${par_decoy_threshold}"} \\\\ + \\${par_ma:+--ma "\\${par_ma}"} \\\\ + \\${par_mp:+--mp "\\${par_mp}"} \\\\ + \\${par_go:+--go "\\${par_go}"} \\\\ + \\${par_ge:+--ge "\\${par_ge}"} \\\\ + \\${par_bandwidth:+--bandwidth "\\${par_bandwidth}"} \\\\ + \\${par_allow_dovetail:+--allowDovetail} \\\\ + \\${par_recover_orphans:+--recoverOrphans} \\\\ + \\${par_mimicBT2:+--mimicBT2} \\\\ + \\${par_mimic_strictBT2:+--mimicStrictBT2} \\\\ + \\${par_softclip:+--softclip} \\\\ + \\${par_softclip_overhangs:+--softclipOverhangs} \\\\ + \\${par_full_length_alignment:+--fullLengthAlignment} \\\\ + \\${par_hard_filter:+--hardFilter} \\\\ + \\${par_min_aln_prob:+--minAlnProb "\\${par_min_aln_prob}"} \\\\ + \\${par_write_mappings:+--write_mappings="\\${par_mappings_sam}"} \\\\ + \\${par_write_qualities:+--writeQualities} \\\\ + \\${par_hit_filter_policy:+--hitFilterPolicy "\\${par_hit_filter_policy}"} \\\\ + \\${par_alternative_init_mode:+--alternativeInitMode} \\\\ + \\${par_aux_dir:+--auxDir "\\${par_aux_dir}"} \\\\ + \\${par_skip_quant:+--skipQuant} \\\\ + \\${par_dump_eq:+--dumpEq} \\\\ + \\${par_dump_eq_weights:+-d "\\${par_dump_eq_weights}"} \\\\ + \\${par_min_assigned_frags:+--minAssignedFrags "\\${par_min_assigned_frags}"} \\\\ + \\${par_reduce_GC_memory:+--reduceGCMemory} \\\\ + \\${par_bias_speed_samp:+--biasSpeedSamp "\\${par_bias_speed_samp}"} \\\\ + \\${par_fld_max:+--fldMax "\\${par_fld_max}"} \\\\ + \\${par_fld_mean:+--fldMean "\\${par_fld_mean}"} \\\\ + \\${par_fld_SD:+--fldSD "\\${par_fld_SD}"} \\\\ + \\${par_forgetting_factor:+-f "\\${par_forgetting_factor}"} \\\\ + \\${par_init_uniform:+--initUniform} \\\\ + \\${par_max_occs_per_hit:+--maxOccsPerHit "\\${par_max_occs_per_hit}"} \\\\ + \\${par_max_read_occ:+-w "\\${par_max_read_occ}"} \\\\ + \\${par_no_length_correction:+--noLengthCorrection} \\\\ + \\${par_no_effective_length_correction:+--noEffectiveLengthCorrection} \\\\ + \\${par_no_single_frag_prob:+--noSingleFragProb} \\\\ + \\${par_no_frag_length_dist:+--noFragLengthDist} \\\\ + \\${par_no_bias_length_threshold:+--noBiasLengthThreshold} \\\\ + \\${par_num_bias_samples:+--numBiasSamples "\\${par_num_bias_samples}"} \\\\ + \\${par_num_aux_model_samples:+--numAuxModelSamples "\\${par_num_aux_model_samples}"} \\\\ + \\${par_num_pre_aux_model_samples:+--numPreAuxModelSamples "\\${par_num_pre_aux_model_samples}"} \\\\ + \\${par_useEM:+--useEM} \\\\ + \\${par_useVBOpt:+--useVBOpt} \\\\ + \\${par_range_factorization_bins:+--rangeFactorizationBins "\\${par_range_factorization_bins}"} \\\\ + \\${par_num_Gibbs_samples:+--numGibbsSamples "\\${par_num_Gibbs_samples}"} \\\\ + \\${par_no_Gamma_draw:+--noGammaDraw} \\\\ + \\${par_num_bootstraps:+--numBootstraps "\\${par_num_bootstraps}"} \\\\ + \\${par_bootstrap_reproject:+--bootstrapReproject} \\\\ + \\${par_thinning_factor:+--thinningFactor "\\${par_thinning_factor}"} \\\\ + \\${par_quiet:+--quiet} \\\\ + \\${par_per_transcript_prior:+--perTranscriptPrior} \\\\ + \\${par_per_nucleotide_prior:+--perNucleotidePrior} \\\\ + \\${par_sig_digits:+--sigDigits "\\${par_sig_digits}"} \\\\ + \\${par_vb_prior:+--vbPrior "\\${par_vb_prior}"} \\\\ + \\${par_write_orphan_links:+--writeOrphanLinks} \\\\ + \\${par_write_unmapped_names:+--writeUnmappedNames} \\\\ + \\${par_no_error_model:+--noErrorModel} \\\\ + \\${par_num_error_bins:+--numErrorBins "\\${par_num_error_bins}"} \\\\ + \\${par_sample_out:+--sampleOut} \\\\ + \\${par_sample_unaligned:+--sampleUnaligned} \\\\ + \\${par_gencode:+--gencode} \\\\ + \\${par_mapping_cache_memory_limit:+--mappingCacheMemoryLimit "\\${par_mapping_cache_memory_limit}"} + +if [ -f "\\$par_output/quant.sf" ]; then + mv \\$par_output/quant.sf \\$par_quant_results +else + echo "Quantification file not generated!" +fi +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/salmon/salmon_quant", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/nextflow.config new file mode 100644 index 0000000..1b9430a --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'salmon/salmon_quant' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Salmon is a tool for wicked-fast transcript quantification from RNA-seq data. It can either make use of pre-computed alignments (in the form of a SAM/BAM file) to the transcripts rather than the raw reads, or can be run in the mapping-based mode. \n' + author = 'Sai Nirmayi Yasa' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/nextflow_schema.json new file mode 100644 index 0000000..2b1c3f3 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/nextflow_schema.json @@ -0,0 +1,1119 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "salmon_quant", +"description": "Salmon is a tool for wicked-fast transcript quantification from RNA-seq data. It can either make use of pre-computed alignments (in the form of a SAM/BAM file) to the transcripts rather than the raw reads, or can be run in the mapping-based mode. \n", +"type": "object", +"definitions": { + + + + "common input options" : { + "title": "Common input options", + "type": "object", + "description": "No description", + "properties": { + + + "lib_type": { + "type": + "string", + "description": "Type: `string`, default: `A`, choices: ``A`, `U`, `SF`, `SR`, `IU`, `IS`, `ISF`, `ISR`, `OU`, `OS`, `OSF`, `OSR`, `MU`, `MS`, `MSF`, `MSR``. Format string describing the library", + "help_text": "Type: `string`, default: `A`, choices: ``A`, `U`, `SF`, `SR`, `IU`, `IS`, `ISF`, `ISR`, `OU`, `OS`, `OSF`, `OSR`, `MU`, `MS`, `MSF`, `MSR``. Format string describing the library.\nThe library type string consists of three parts: \n1. Relative orientation of the reads: This part is only provided if the library is paired-end, The possible options are\n I = inward\n O = outward\n M = matching\n2. Strandedness of the library: This part specifies whether the protocol is stranded or unstranded. The options are:\n S = stranded\n U = unstranded\n3. Directionality of the reads: If the library is stranded, the final part of the library string is used to specify the strand from which the read originates. The possible values are\n F = read 1 (or single-end read) comes from the forward strand\n R = read 1 (or single-end read) comes from the reverse strand\n", + "enum": ["A", "U", "SF", "SR", "IU", "IS", "ISF", "ISR", "OU", "OS", "OSF", "OSR", "MU", "MS", "MSF", "MSR"] + + , + "default":"A" + } + + +} +}, + + + "mapping input options" : { + "title": "Mapping input options", + "type": "object", + "description": "No description", + "properties": { + + + "index": { + "type": + "string", + "description": "Type: `file`, example: `transcriptome_index`. Salmon index", + "help_text": "Type: `file`, example: `transcriptome_index`. Salmon index.\n" + + } + + + , + "unmated_reads": { + "type": + "string", + "description": "Type: List of `file`, example: `sample.fq.gz`, multiple_sep: `\";\"`. List of files containing unmated reads of (e", + "help_text": "Type: List of `file`, example: `sample.fq.gz`, multiple_sep: `\";\"`. List of files containing unmated reads of (e.g. single-end reads).\n" + + } + + + , + "mates1": { + "type": + "string", + "description": "Type: List of `file`, example: `sample_1.fq.gz`, multiple_sep: `\";\"`. File containing the #1 mates", + "help_text": "Type: List of `file`, example: `sample_1.fq.gz`, multiple_sep: `\";\"`. File containing the #1 mates.\n" + + } + + + , + "mates2": { + "type": + "string", + "description": "Type: List of `file`, example: `sample_2.fq.gz`, multiple_sep: `\";\"`. File containing the #2 mates", + "help_text": "Type: List of `file`, example: `sample_2.fq.gz`, multiple_sep: `\";\"`. File containing the #2 mates.\n" + + } + + +} +}, + + + "alignment input options" : { + "title": "Alignment input options", + "type": "object", + "description": "No description", + "properties": { + + + "discard_orphans": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Discard orphan alignments in the input [for alignment-based mode only]", + "help_text": "Type: `boolean_true`, default: `false`. Discard orphan alignments in the input [for alignment-based mode only]. If this flag is passed, then only paired alignments will be considered toward quantification estimates. The default behavior is to consider orphan alignments if no valid paired mappings exist.\n" + , + "default":false + } + + + , + "alignments": { + "type": + "string", + "description": "Type: List of `file`, example: `sample.fq.gz`, multiple_sep: `\";\"`. Input alignment (BAM) file(s)", + "help_text": "Type: List of `file`, example: `sample.fq.gz`, multiple_sep: `\";\"`. Input alignment (BAM) file(s).\n" + + } + + + , + "eqclasses": { + "type": + "string", + "description": "Type: `file`. input salmon weighted equivalence class file", + "help_text": "Type: `file`. input salmon weighted equivalence class file.\n" + + } + + + , + "targets": { + "type": + "string", + "description": "Type: `file`, example: `transcripts.fasta`. FASTA format file containing target transcripts", + "help_text": "Type: `file`, example: `transcripts.fasta`. FASTA format file containing target transcripts.\n" + + } + + + , + "ont": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use alignment model for Oxford Nanopore long reads\n", + "help_text": "Type: `boolean_true`, default: `false`. Use alignment model for Oxford Nanopore long reads\n" + , + "default":false + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output`, example: `quant_output`. Output quantification directory", + "help_text": "Type: `file`, required, default: `$id.$key.output`, example: `quant_output`. Output quantification directory.\n" + , + "default":"$id.$key.output" + } + + + , + "quant_results": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.quant_results.sf`, example: `quant.sf`. Salmon quantification file", + "help_text": "Type: `file`, default: `$id.$key.quant_results.sf`, example: `quant.sf`. Salmon quantification file.\n" + , + "default":"$id.$key.quant_results.sf" + } + + +} +}, + + + "basic options" : { + "title": "Basic options", + "type": "object", + "description": "No description", + "properties": { + + + "seq_bias": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Perform sequence-specific bias correction", + "help_text": "Type: `boolean_true`, default: `false`. Perform sequence-specific bias correction.\n" + , + "default":false + } + + + , + "gc_bias": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Perform fragment GC bias correction [beta for single-end reads]", + "help_text": "Type: `boolean_true`, default: `false`. Perform fragment GC bias correction [beta for single-end reads].\n" + , + "default":false + } + + + , + "pos_bias": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Perform positional bias correction", + "help_text": "Type: `boolean_true`, default: `false`. Perform positional bias correction.\n" + , + "default":false + } + + + , + "incompat_prior": { + "type": + "number", + "description": "Type: `double`, example: `0.0`. Set the prior probability that an alignment that disagrees with the specified library type (--lib_type) results from the true fragment origin", + "help_text": "Type: `double`, example: `0.0`. Set the prior probability that an alignment that disagrees with the specified library type (--lib_type) results from the true fragment origin. Setting this to 0 specifies that alignments that disagree with the library type should be \"impossible\", while setting it to 1 says that alignments that disagree with the library type are no less likely than those that do.\n" + + } + + + , + "gene_map": { + "type": + "string", + "description": "Type: `file`, example: `gene_map.gtf`. File containing a mapping of transcripts to genes", + "help_text": "Type: `file`, example: `gene_map.gtf`. File containing a mapping of transcripts to genes. If this file is provided salmon will output both quant.sf and quant.genes.sf files, where the latter contains aggregated gene-level abundance estimates. The transcript to gene mapping should be provided as either a GTF file, or a in a simple tab-delimited format where each line contains the name of a transcript and the gene to which it belongs separated by a tab. The extension of the file is used to determine how the file should be parsed. Files ending in \u0027.gtf\u0027, \u0027.gff\u0027 or \u0027.gff3\u0027 are assumed to be in GTF format; files with any other extension are assumed to be in the simple format. In GTF / GFF format, the \"transcript_id\" is assumed to contain the transcript identifier and the \"gene_id\" is assumed to contain the corresponding gene identifier.\n" + + } + + + , + "aux_target_file": { + "type": + "string", + "description": "Type: `file`, example: `auxilary_targets.txt`. A file containing a list of \"auxiliary\" targets", + "help_text": "Type: `file`, example: `auxilary_targets.txt`. A file containing a list of \"auxiliary\" targets. These are valid targets (i.e., not decoys) to which fragments are allowed to map and be assigned, and which will be quantified, but for which auxiliary models like sequence-specific and fragment-GC bias correction should not be applied.\n" + + } + + + , + "meta": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If you\u0027re using Salmon on a metagenomic dataset, consider setting this flag to disable parts of the abundance estimation model that make less sense for metagenomic data", + "help_text": "Type: `boolean_true`, default: `false`. If you\u0027re using Salmon on a metagenomic dataset, consider setting this flag to disable parts of the abundance estimation model that make less sense for metagenomic data.\n" + , + "default":false + } + + + , + "score_exp": { + "type": + "number", + "description": "Type: `double`, example: `1.0`. The factor by which sub-optimal alignment scores are downweighted to produce a probability", + "help_text": "Type: `double`, example: `1.0`. The factor by which sub-optimal alignment scores are downweighted to produce a probability. If the best alignment score for the current read is S, and the score for a particular alignment is w, then the probability will be computed porportional to exp( - scoreExp * (S-w) ).\n" + + } + + +} +}, + + + "options specific to mapping mode" : { + "title": "Options specific to mapping mode", + "type": "object", + "description": "No description", + "properties": { + + + "discard_orphans_quasi": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nDiscard orphan mappings in selective-alignment mode", + "help_text": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nDiscard orphan mappings in selective-alignment mode. If this flag is passed then only paired mappings will be considered toward quantification estimates. The default behavior is to consider orphan mappings if no valid paired mappings exist. This flag is independent of the option to write the orphaned mappings to file (--writeOrphanLinks).\n" + , + "default":false + } + + + , + "consensus_slack": { + "type": + "number", + "description": "Type: `double`, example: `0.35`. [selective-alignment mode only] \nThe amount of slack allowed in the selective-alignment filtering mechanism", + "help_text": "Type: `double`, example: `0.35`. [selective-alignment mode only] \nThe amount of slack allowed in the selective-alignment filtering mechanism. If this is set to a fraction, X, greater than 0 (and in [0,1)), then uniMEM chains with scores below (100 * X)% of the best chain score for a read, and read pairs with a sum of chain scores below (100 * X)% of the best chain score for a read pair will be discounted as a mapping candidates. The default value of this option is 0.35.\n" + + } + + + , + "pre_merge_chain_sub_thresh": { + "type": + "number", + "description": "Type: `double`, example: `0.75`. [selective-alignment mode only] \nThe threshold of sub-optimal chains, compared to the best chain on a given target, that will be retained and passed to the next phase of mapping", + "help_text": "Type: `double`, example: `0.75`. [selective-alignment mode only] \nThe threshold of sub-optimal chains, compared to the best chain on a given target, that will be retained and passed to the next phase of mapping. Specifically, if the best chain for a read (or read-end in paired-end mode) to target t has score X_t, then all chains for this read with score \u003e= X_t * preMergeChainSubThresh will be retained and passed to subsequent mapping phases. This value must be in the range [0, 1].\n" + + } + + + , + "post_merge_chain_sub_thresh": { + "type": + "number", + "description": "Type: `double`, example: `0.9`. [selective-alignment mode only] \nThe threshold of sub-optimal chains, compared to the best chain on a given target, that will be retained and passed to the next phase of mapping", + "help_text": "Type: `double`, example: `0.9`. [selective-alignment mode only] \nThe threshold of sub-optimal chains, compared to the best chain on a given target, that will be retained and passed to the next phase of mapping. This is different than post_merge_chain_sub_thresh, because this is applied to pairs of chains (from the ends of paired-end reads) after merging (i.e. after checking concordancy constraints etc.). Specifically, if the best chain pair to target t has score X_t, then all chain pairs for this read pair with score \u003e= X_t * post_merge_chain_sub_thresh will be retained and passed to subsequent mapping phases. This value must be in the range [0, 1]. Note: This option is only meaningful for paired-end libraries, and is ignored for single-end libraries.\n" + + } + + + , + "orphan_chain_sub_thresh": { + "type": + "number", + "description": "Type: `double`, example: `0.95`. [selective-alignment mode only]\nThis threshold sets a global sub-optimality threshold for chains corresponding to orphan mappings", + "help_text": "Type: `double`, example: `0.95`. [selective-alignment mode only]\nThis threshold sets a global sub-optimality threshold for chains corresponding to orphan mappings. That is, if the merging procedure results in no concordant mappings then only orphan mappings with a chain score \u003e= orphan_chain_sub_thresh * bestChainScore will be retained and passed to subsequent mapping phases. This value must be in the range [0, 1]. Note: This option is only meaningful for paired-end libraries, and is ignored for single-end libraries.\n" + + } + + + , + "min_score_fraction": { + "type": + "number", + "description": "Type: `double`, example: `0.65`. [selective-alignment mode only]\nThe fraction of the optimal possible alignment score that a mapping must achieve in order to be considered \"valid\" --- should be in (0,1]", + "help_text": "Type: `double`, example: `0.65`. [selective-alignment mode only]\nThe fraction of the optimal possible alignment score that a mapping must achieve in order to be considered \"valid\" --- should be in (0,1]. Default 0.65\n" + + } + + + , + "mismatch_seed_skip": { + "type": + "integer", + "description": "Type: `integer`, example: `3`. [selective-alignment mode only]\nAfter a k-mer hit is extended to a uni-MEM, the uni-MEM extension can terminate for one of 3 reasons; the end of the read, the end of the unitig, or a mismatch", + "help_text": "Type: `integer`, example: `3`. [selective-alignment mode only]\nAfter a k-mer hit is extended to a uni-MEM, the uni-MEM extension can terminate for one of 3 reasons; the end of the read, the end of the unitig, or a mismatch. If the extension ends because of a mismatch, this is likely the result of a sequencing error. To avoid looking up many k-mers that will likely fail to be located in the index, the search procedure skips by a factor of mismatch_seed_skip until it either (1) finds another match or (2) is k-bases past the mismatch position. This value controls that skip length. A smaller value can increase sensitivity, while a larger value can speed up seeding.\n" + + } + + + , + "disable_chaining_heuristic": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nBy default, the heuristic of (Li 2018) is implemented, which terminates the chaining DP once a given number of valid backpointers are found", + "help_text": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nBy default, the heuristic of (Li 2018) is implemented, which terminates the chaining DP once a given number of valid backpointers are found. This speeds up the seed (MEM) chaining step, but may result in sub-optimal chains in complex situations (e.g. sequences with many repeats and overlapping repeats). Passing this flag will disable the chaining heuristic, and perform the full chaining dynamic program, guaranteeing the optimal chain is found in this step.\n" + , + "default":false + } + + + , + "decoy_threshold": { + "type": + "number", + "description": "Type: `double`, example: `1.0`. [selective-alignment mode only]\nFor an alignemnt to an annotated transcript to be considered invalid, it must have an alignment score \u003c (decoy_threshold * bestDecoyScore)", + "help_text": "Type: `double`, example: `1.0`. [selective-alignment mode only]\nFor an alignemnt to an annotated transcript to be considered invalid, it must have an alignment score \u003c (decoy_threshold * bestDecoyScore). A value of 1.0 means that any alignment strictly worse than the best decoy alignment will be discarded. A smaller value will allow reads to be allocated to transcripts even if they strictly align better to the decoy sequence.\n" + + } + + + , + "ma": { + "type": + "integer", + "description": "Type: `integer`, example: `2`. [selective-alignment mode only]\nThe value given to a match between read and reference nucleotides in an alignment", + "help_text": "Type: `integer`, example: `2`. [selective-alignment mode only]\nThe value given to a match between read and reference nucleotides in an alignment.\n" + + } + + + , + "mp": { + "type": + "integer", + "description": "Type: `integer`, example: `-4`. [selective-alignment mode only]\nThe value given to a mis-match between read and reference nucleotides in an alignment", + "help_text": "Type: `integer`, example: `-4`. [selective-alignment mode only]\nThe value given to a mis-match between read and reference nucleotides in an alignment.\n" + + } + + + , + "go": { + "type": + "integer", + "description": "Type: `integer`, example: `6`. [selective-alignment mode only]\nThe value given to a gap opening in an alignment", + "help_text": "Type: `integer`, example: `6`. [selective-alignment mode only]\nThe value given to a gap opening in an alignment.\n" + + } + + + , + "ge": { + "type": + "integer", + "description": "Type: `integer`, example: `2`. [selective-alignment mode only]\nThe value given to a gap extension in an alignment", + "help_text": "Type: `integer`, example: `2`. [selective-alignment mode only]\nThe value given to a gap extension in an alignment.\n" + + } + + + , + "bandwidth": { + "type": + "integer", + "description": "Type: `integer`, example: `15`. [selective-alignment mode only]\nThe value used for the bandwidth passed to ksw2", + "help_text": "Type: `integer`, example: `15`. [selective-alignment mode only]\nThe value used for the bandwidth passed to ksw2. A smaller bandwidth can make the alignment verification run more quickly, but could possibly miss valid alignments.\n" + + } + + + , + "allow_dovetail": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nAllow dovetailing mappings", + "help_text": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nAllow dovetailing mappings.\n" + , + "default":false + } + + + , + "recover_orphans": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nAttempt to recover the mates of orphaned reads", + "help_text": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nAttempt to recover the mates of orphaned reads. This uses edlib for orphan recovery, and so introduces some computational overhead, but it can improve sensitivity.\n" + , + "default":false + } + + + , + "mimicBT2": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nSet flags to mimic parameters similar to Bowtie2 with --no-discordant and --no-mixed flags", + "help_text": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nSet flags to mimic parameters similar to Bowtie2 with --no-discordant and --no-mixed flags. This increases disallows dovetailing reads, and discards orphans. Note, this does not impose the very strict parameters assumed by RSEM+Bowtie2, like gapless alignments. For that behavior, use the --mimic_strictBT2 flag below.\n" + , + "default":false + } + + + , + "mimic_strictBT2": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nSet flags to mimic the very strict parameters used by RSEM+Bowtie2", + "help_text": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nSet flags to mimic the very strict parameters used by RSEM+Bowtie2. This increases --min_score_fraction to 0.8, disallows dovetailing reads, discards orphans, and disallows gaps in alignments.\n" + , + "default":false + } + + + , + "softclip": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nAllos soft-clipping of reads during selective-alignment", + "help_text": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nAllos soft-clipping of reads during selective-alignment. If this option is provided, then regions at the beginning or end of the read can be withheld from alignment without any effect on the resulting score (i.e. neither adding nor removing from the score). This will drastically reduce the penalty if there are mismatches at the beginning or end of the read due to e.g. low-quality bases or adapters. NOTE: Even with soft-clipping enabled, the read must still achieve a score of at least min_score_fraction * maximum achievable score, where the maximum achievable score is computed based on the full (un-clipped) read length.\n" + , + "default":false + } + + + , + "softclip_overhangs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nAllow soft-clipping of reads that overhang the beginning or ends of the transcript", + "help_text": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nAllow soft-clipping of reads that overhang the beginning or ends of the transcript. In this case, the overhaning section of the read will simply be unaligned, and will not contribute or detract from the alignment score. The default policy is to force an end-to-end alignment of the entire read, so that overhanings will result in some deletion of nucleotides from the read.\n" + , + "default":false + } + + + , + "full_length_alignment": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nPerform selective alignment over the full length of the read, beginning from the (approximate) initial mapping location and using extension alignment", + "help_text": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nPerform selective alignment over the full length of the read, beginning from the (approximate) initial mapping location and using extension alignment. This is in contrast with the default behavior which is to only perform alignment between the MEMs in the optimal chain (and before the first and after the last MEM if applicable). The default strategy forces the MEMs to belong to the alignment, but has the benefit that it can discover indels prior to the first hit shared between the read and reference. Except in very rare circumstances, the default mode should be more accurate.\n" + , + "default":false + } + + + , + "hard_filter": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nInstead of weighting mappings by their alignment score, this flag will discard any mappings with sub-optimal alignment score", + "help_text": "Type: `boolean_true`, default: `false`. [selective-alignment mode only] \nInstead of weighting mappings by their alignment score, this flag will discard any mappings with sub-optimal alignment score. The default option of soft-filtering (i.e. weighting mappings by their alignment score) usually yields slightly more accurate abundance estimates but this flag may be desirable if you want more accurate \u0027naive\u0027 equivalence classes, rather than range factorized equivalence classes.\n" + , + "default":false + } + + + , + "min_aln_prob": { + "type": + "number", + "description": "Type: `double`, example: `1.0E-5`. The minimum number of fragments that must be assigned to the transcriptome for quantification to proceed", + "help_text": "Type: `double`, example: `1.0E-5`. The minimum number of fragments that must be assigned to the transcriptome for quantification to proceed.\n" + + } + + + , + "write_mappings": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If this option is provided, then the selective-alignment results will be written out in SAM-compatible format", + "help_text": "Type: `boolean_true`, default: `false`. If this option is provided, then the selective-alignment results will be written out in SAM-compatible format. By default, output will be directed to stdout, but an alternative file name can be provided instead.\n" + , + "default":false + } + + + , + "mapping_sam": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.mapping_sam.sam`, example: `mappings.sam`. Path to file that should output the selective-alignment results in SAM-compatible format", + "help_text": "Type: `file`, default: `$id.$key.mapping_sam.sam`, example: `mappings.sam`. Path to file that should output the selective-alignment results in SAM-compatible format. This option must be provided while using --write_mappings" + , + "default":"$id.$key.mapping_sam.sam" + } + + + , + "write_qualities": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. This flag only has meaning if mappings are being written (with --write_mappings/-z)", + "help_text": "Type: `boolean_true`, default: `false`. This flag only has meaning if mappings are being written (with --write_mappings/-z). If this flag is provided, then the output SAM file will contain quality strings as well as read sequences. Note that this can greatly increase the size of the output file.\n" + , + "default":false + } + + + , + "hit_filter_policy": { + "type": + "string", + "description": "Type: `string`, example: `AFTER`, choices: ``BEFORE`, `AFTER`, `BOTH`, `NONE``. [selective-alignment mode only]\nDetermines the policy by which hits are filtered in selective alignment", + "help_text": "Type: `string`, example: `AFTER`, choices: ``BEFORE`, `AFTER`, `BOTH`, `NONE``. [selective-alignment mode only]\nDetermines the policy by which hits are filtered in selective alignment. Filtering hits after chaining (the default) is more sensitive, but more computationally intensive, because it performs the chaining dynamic program for all hits. Filtering before chaining is faster, but some true hits may be missed. The options are BEFORE, AFTER, BOTH and NONE.\n", + "enum": ["BEFORE", "AFTER", "BOTH", "NONE"] + + + } + + +} +}, + + + "advance options" : { + "title": "Advance options", + "type": "object", + "description": "No description", + "properties": { + + + "alternative_init_mode": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use an alternative strategy (rather than simple interpolation between) the online and uniform abundance estimates to initialize the EM / VBEM algorithm", + "help_text": "Type: `boolean_true`, default: `false`. Use an alternative strategy (rather than simple interpolation between) the online and uniform abundance estimates to initialize the EM / VBEM algorithm.\n" + , + "default":false + } + + + , + "aux_dir": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.aux_dir`, example: `aux_info`. The sub-directory of the quantification directory where auxiliary information e", + "help_text": "Type: `file`, default: `$id.$key.aux_dir`, example: `aux_info`. The sub-directory of the quantification directory where auxiliary information e.g. bootstraps, bias parameters, etc. will be written.\n" + , + "default":"$id.$key.aux_dir" + } + + + , + "skip_quant": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip performing the actual transcript quantification (including any Gibbs sampling or bootstrapping)", + "help_text": "Type: `boolean_true`, default: `false`. Skip performing the actual transcript quantification (including any Gibbs sampling or bootstrapping).\n" + , + "default":false + } + + + , + "dump_eq": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Dump the simple equivalence class counts that were computed during mapping or alignment", + "help_text": "Type: `boolean_true`, default: `false`. Dump the simple equivalence class counts that were computed during mapping or alignment.\n" + , + "default":false + } + + + , + "dump_eq_weights": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Dump conditional probabilities associated with transcripts when equivalence class information is being dumped to file", + "help_text": "Type: `boolean_true`, default: `false`. Dump conditional probabilities associated with transcripts when equivalence class information is being dumped to file. Note, this will dump the factorization that is actually used by salmon\u0027s offline phase for inference. If you are using range-factorized equivalence classes (the default) then the same transcript set may appear multiple times with different associated conditional probabilities.\n" + , + "default":false + } + + + , + "min_assigned_frags": { + "type": + "integer", + "description": "Type: `integer`, example: `10`. The minimum number of fragments that must be assigned to the transcriptome for quantification to proceed", + "help_text": "Type: `integer`, example: `10`. The minimum number of fragments that must be assigned to the transcriptome for quantification to proceed.\n" + + } + + + , + "reduce_GC_memory": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If this option is selected, a more memory efficient (but slightly slower) representation is used to compute fragment GC content", + "help_text": "Type: `boolean_true`, default: `false`. If this option is selected, a more memory efficient (but slightly slower) representation is used to compute fragment GC content. Enabling this will reduce memory usage, but can also reduce speed. However, the results themselves will remain the same.\n" + , + "default":false + } + + + , + "bias_speed_samp": { + "type": + "integer", + "description": "Type: `integer`, example: `5`. The value at which the fragment length PMF is down-sampled when evaluating sequence-specific \u0026 GC fragment bias", + "help_text": "Type: `integer`, example: `5`. The value at which the fragment length PMF is down-sampled when evaluating sequence-specific \u0026 GC fragment bias. Larger values speed up effective length correction, but may decrease the fidelity of bias modeling results.\n" + + } + + + , + "fld_max": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. The maximum fragment length to consider when building the empirical distribution\n", + "help_text": "Type: `integer`, example: `1000`. The maximum fragment length to consider when building the empirical distribution\n" + + } + + + , + "fld_mean": { + "type": + "integer", + "description": "Type: `integer`, example: `250`. The mean used in the fragment length distribution prior\n", + "help_text": "Type: `integer`, example: `250`. The mean used in the fragment length distribution prior\n" + + } + + + , + "fld_SD": { + "type": + "integer", + "description": "Type: `integer`, example: `25`. The standard deviation used in the fragment length distribution prior\n", + "help_text": "Type: `integer`, example: `25`. The standard deviation used in the fragment length distribution prior\n" + + } + + + , + "forgetting_factor": { + "type": + "number", + "description": "Type: `double`, example: `0.65`. The forgetting factor used in the online learning schedule", + "help_text": "Type: `double`, example: `0.65`. The forgetting factor used in the online learning schedule. A smallervalue results in quicker learning, but higher variance and may be unstable. A larger value results in slower learning but may be more stable. Value should be in the interval (0.5, 1.0].\n" + + } + + + , + "init_uniform": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Initialize the offline inference with uniform parameters, rather than seeding with online parameters", + "help_text": "Type: `boolean_true`, default: `false`. Initialize the offline inference with uniform parameters, rather than seeding with online parameters.\n" + , + "default":false + } + + + , + "max_occs_per_hit": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. When collecting \"hits\" (MEMs), hits having more than max_occs_per_hit occurrences won\u0027t be considered", + "help_text": "Type: `integer`, example: `1000`. When collecting \"hits\" (MEMs), hits having more than max_occs_per_hit occurrences won\u0027t be considered.\n" + + } + + + , + "max_read_occ": { + "type": + "integer", + "description": "Type: `integer`, example: `200`. Reads \"mapping\" to more than this many places won\u0027t be considered", + "help_text": "Type: `integer`, example: `200`. Reads \"mapping\" to more than this many places won\u0027t be considered.\n" + + } + + + , + "no_length_correction": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Entirely disables length correction when estimating the abundance of transcripts", + "help_text": "Type: `boolean_true`, default: `false`. Entirely disables length correction when estimating the abundance of transcripts. This option can be used with protocols where one expects that fragments derive from their underlying targets without regard to that target\u0027s length (e.g. QuantSeq)\n" + , + "default":false + } + + + , + "no_effective_length_correction": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disables effective length correction when computing the probability that a fragment was generated from a transcript", + "help_text": "Type: `boolean_true`, default: `false`. Disables effective length correction when computing the probability that a fragment was generated from a transcript. If this flag is passed in,the fragment length distribution is not taken into account when computing this probability.\n" + , + "default":false + } + + + , + "no_single_frag_prob": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disables the estimation of an associated fragment length probability for single-end reads or for orphaned mappings in paired-end libraries", + "help_text": "Type: `boolean_true`, default: `false`. Disables the estimation of an associated fragment length probability for single-end reads or for orphaned mappings in paired-end libraries. The default behavior is to consider the probability of all possible fragment lengths associated with the retained mapping. Enabling this flag (i.e. turning this default behavior off) will simply not attempt to estimate a fragment length probability in such cases.\n" + , + "default":false + } + + + , + "no_frag_length_dist": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Don\u0027t consider concordance with the learned fragment length distribution when trying to determine the probability that a fragment has originated from a specified location", + "help_text": "Type: `boolean_true`, default: `false`. Don\u0027t consider concordance with the learned fragment length distribution when trying to determine the probability that a fragment has originated from a specified location. Normally, Fragments with unlikely lengths will be assigned a smaller relative probability than those with more likely lengths. When this flag is passed in, the observed fragment length has no effect on that fragment\u0027s a priori probability.\n" + , + "default":false + } + + + , + "no_bias_length_threshold": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If this option is enabled, then no (lower) threshold will be set on how short bias correction can make effective lengths", + "help_text": "Type: `boolean_true`, default: `false`. If this option is enabled, then no (lower) threshold will be set on how short bias correction can make effective lengths. This can increase the precision of bias correction, but harm robustness. The default correction applies a threshold.\n" + , + "default":false + } + + + , + "num_bias_samples": { + "type": + "integer", + "description": "Type: `integer`, example: `2000000`. Number of fragment mappings to use when learning the sequence-specific bias model", + "help_text": "Type: `integer`, example: `2000000`. Number of fragment mappings to use when learning the sequence-specific bias model.\n" + + } + + + , + "num_aux_model_samples": { + "type": + "integer", + "description": "Type: `integer`, example: `5000000`. The first \u003cnum_aux_model_samples\u003e are used to train the auxiliary model parameters (e", + "help_text": "Type: `integer`, example: `5000000`. The first \u003cnum_aux_model_samples\u003e are used to train the auxiliary model parameters (e.g. fragment length distribution, bias, etc.). After ther first \u003cnum_aux_model_samples\u003e observations the auxiliary model parameters will be assumed to have converged and will be fixed.\n" + + } + + + , + "num_pre_aux_model_samples": { + "type": + "integer", + "description": "Type: `integer`, example: `5000`. The first \u003cnumPreAuxModelSamples\u003e will have their assignment likelihoods and contributions to the transcript abundances computed without applying any auxiliary models", + "help_text": "Type: `integer`, example: `5000`. The first \u003cnumPreAuxModelSamples\u003e will have their assignment likelihoods and contributions to the transcript abundances computed without applying any auxiliary models. The purpose of ignoring the auxiliary models for the first \u003cnum_pre_aux_model_samples\u003e observations is to avoid applying these models before their parameters have been learned sufficiently well.\n" + + } + + + , + "useEM": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use the traditional EM algorithm for optimization in the batch passes", + "help_text": "Type: `boolean_true`, default: `false`. Use the traditional EM algorithm for optimization in the batch passes.\n" + , + "default":false + } + + + , + "useVBOpt": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use the Variational Bayesian EM [default]\n", + "help_text": "Type: `boolean_true`, default: `false`. Use the Variational Bayesian EM [default]\n" + , + "default":false + } + + + , + "range_factorization_bins": { + "type": + "integer", + "description": "Type: `integer`, example: `4`. Factorizes the likelihood used in quantification by adopting a new notion of equivalence classes based on the conditional probabilities with which fragments are generated from different transcripts", + "help_text": "Type: `integer`, example: `4`. Factorizes the likelihood used in quantification by adopting a new notion of equivalence classes based on the conditional probabilities with which fragments are generated from different transcripts. This is a more fine-grained factorization than the normal rich equivalence classes. The default value (4) corresponds to the default used in Zakeri et al. 2017 (doi: 10.1093/bioinformatics/btx262), and larger values imply a more fine-grained factorization. If range factorization is enabled, a common value to select for this parameter is 4. A value of 0 signifies the use of basic rich equivalence classes.\n" + + } + + + , + "num_Gibbs_samples": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Number of Gibbs sampling rounds to perform", + "help_text": "Type: `integer`, example: `0`. Number of Gibbs sampling rounds to perform.\n" + + } + + + , + "no_Gamma_draw": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. This switch will disable drawing transcript fractions from a Gamma distribution during Gibbs sampling", + "help_text": "Type: `boolean_true`, default: `false`. This switch will disable drawing transcript fractions from a Gamma distribution during Gibbs sampling. In this case the sampler does not account for shot-noise, but only assignment ambiguity\n" + , + "default":false + } + + + , + "num_bootstraps": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Number of bootstrap samples to generate", + "help_text": "Type: `integer`, example: `0`. Number of bootstrap samples to generate. Note: This is mutually exclusive with Gibbs sampling.\n" + + } + + + , + "bootstrap_reproject": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. This switch will learn the parameter distribution from the bootstrapped counts for each sample, but will reproject those parameters onto the original equivalence class counts", + "help_text": "Type: `boolean_true`, default: `false`. This switch will learn the parameter distribution from the bootstrapped counts for each sample, but will reproject those parameters onto the original equivalence class counts.\n" + , + "default":false + } + + + , + "thinning_factor": { + "type": + "integer", + "description": "Type: `integer`, example: `16`. Number of steps to discard for every sample kept from the Gibbs chain", + "help_text": "Type: `integer`, example: `16`. Number of steps to discard for every sample kept from the Gibbs chain. The larger this number, the less chance that subsequent samples are auto-correlated, but the slower sampling becomes.\n" + + } + + + , + "quiet": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Be quiet while doing quantification (don\u0027t write informative output to the console unless something goes wrong)", + "help_text": "Type: `boolean_true`, default: `false`. Be quiet while doing quantification (don\u0027t write informative output to the console unless something goes wrong).\n" + , + "default":false + } + + + , + "per_transcript_prior": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. The prior (either the default or the argument provided via --vb_prior) will be interpreted as a transcript-level prior (i", + "help_text": "Type: `boolean_true`, default: `false`. The prior (either the default or the argument provided via --vb_prior) will be interpreted as a transcript-level prior (i.e. each transcript will be given a prior read count of this value)\n" + , + "default":false + } + + + , + "per_nucleotide_prior": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. The prior (either the default or the argument provided via --vb_prior) will be interpreted as a nucleotide-level prior (i", + "help_text": "Type: `boolean_true`, default: `false`. The prior (either the default or the argument provided via --vb_prior) will be interpreted as a nucleotide-level prior (i.e. each nucleotide will be given a prior read count of this value)\n" + , + "default":false + } + + + , + "sig_digits": { + "type": + "integer", + "description": "Type: `integer`, example: `3`. The number of significant digits to write when outputting the EffectiveLength and NumReads columns\n", + "help_text": "Type: `integer`, example: `3`. The number of significant digits to write when outputting the EffectiveLength and NumReads columns\n" + + } + + + , + "vb_prior": { + "type": + "number", + "description": "Type: `double`, example: `0.01`. The prior that will be used in the VBEM algorithm", + "help_text": "Type: `double`, example: `0.01`. The prior that will be used in the VBEM algorithm. This is interpreted as a per-transcript prior, unless the --per_nucleotide_prior flag is also given. If the --per_nucleotide_prior flag is given, this is used as a nucleotide-level prior. If the default is used, it will be divided by 1000 before being used as a nucleotide-level prior, i.e. the default per-nucleotide prior will be 1e-5.\n" + + } + + + , + "write_orphan_links": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Write the transcripts that are linked by orphaned reads", + "help_text": "Type: `boolean_true`, default: `false`. Write the transcripts that are linked by orphaned reads.\n" + , + "default":false + } + + + , + "write_unmapped_names": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Write the names of un-mapped reads to the file unmapped_names", + "help_text": "Type: `boolean_true`, default: `false`. Write the names of un-mapped reads to the file unmapped_names.txt in the auxiliary directory.\n" + , + "default":false + } + + +} +}, + + + "alignment-specific options" : { + "title": "Alignment-specific options", + "type": "object", + "description": "No description", + "properties": { + + + "no_error_model": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Turn off the alignment error model, which takes into account the the observed frequency of different types of mismatches / indels when computing the likelihood of a given alignment", + "help_text": "Type: `boolean_true`, default: `false`. Turn off the alignment error model, which takes into account the the observed frequency of different types of mismatches / indels when computing the likelihood of a given alignment. Turning this off can speed up alignment-based salmon, but can harm quantification accuracy.\n" + , + "default":false + } + + + , + "num_error_bins": { + "type": + "integer", + "description": "Type: `integer`, example: `6`. The number of bins into which to divide each read when learning and applying the error model", + "help_text": "Type: `integer`, example: `6`. The number of bins into which to divide each read when learning and applying the error model. For example, a value of 10 would mean that effectively, a separate error model is leared and applied to each 10th of the read, while a value of 3 would mean that a separate error model is applied to the read beginning (first third), middle (second third) and end (final third).\n" + + } + + + , + "sample_out": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Write a \"postSample", + "help_text": "Type: `boolean_true`, default: `false`. Write a \"postSample.bam\" file in the output directory that will sample the input alignments according to the estimated transcript abundances. If you\u0027re going to perform downstream analysis of the alignments with tools which don\u0027t, themselves, take fragment assignment ambiguity into account, you should use this output.\n" + , + "default":false + } + + + , + "sample_unaligned": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. In addition to sampling the aligned reads, also write the un-aligned reads to \"postSample", + "help_text": "Type: `boolean_true`, default: `false`. In addition to sampling the aligned reads, also write the un-aligned reads to \"postSample.bam\".\n" + , + "default":false + } + + + , + "gencode": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. This flag will expect the input transcript fasta to be in GENCODE format, and will split the transcript name at the first \u0027|\u0027 character", + "help_text": "Type: `boolean_true`, default: `false`. This flag will expect the input transcript fasta to be in GENCODE format, and will split the transcript name at the first \u0027|\u0027 character. These reduced names will be used in the output and when looking for these transcripts in a gene to transcript GTF.\n" + , + "default":false + } + + + , + "mapping_cache_memory_limit": { + "type": + "integer", + "description": "Type: `integer`, example: `2000000`. If the file contained fewer than this many mapped reads, then just keep the data in memory for subsequent rounds of inference", + "help_text": "Type: `integer`, example: `2000000`. If the file contained fewer than this many mapped reads, then just keep the data in memory for subsequent rounds of inference. Obviously, this value should not be too large if you wish to keep a low memory usage, but setting it large enough to accommodate all of the mapped read can substantially speed up inference on \"small\" files that contain only a few million reads.\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/common input options" + }, + + { + "$ref": "#/definitions/mapping input options" + }, + + { + "$ref": "#/definitions/alignment input options" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/basic options" + }, + + { + "$ref": "#/definitions/options specific to mapping mode" + }, + + { + "$ref": "#/definitions/advance options" + }, + + { + "$ref": "#/definitions/alignment-specific options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/.config.vsh.yaml new file mode 100644 index 0000000..c9ba5a1 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/.config.vsh.yaml @@ -0,0 +1,214 @@ +name: "samtools_flagstat" +namespace: "samtools" +version: "v0.3.1" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--bam" + description: "BAM input files.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bai" + description: "BAM index file.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "File containing samtools stats output.\n" + info: null + example: + - "output.flagstat" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Counts the number of alignments in SAM/BAM/CRAM files for each FLAG\ + \ type." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "stats" +- "mapping" +- "counts" +- "bam" +- "sam" +- "cram" +license: "MIT/Expat" +references: + doi: + - "10.1093/bioinformatics/btp352" + - "10.1093/gigascience/giab008" +links: + repository: "https://github.com/samtools/samtools" + homepage: "https://www.htslib.org/" + documentation: "https://www.htslib.org/doc/samtools-flagstat.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\ + \ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/samtools/samtools_flagstat/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/samtools/samtools_flagstat" + executable: "target/nextflow/samtools/samtools_flagstat/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/main.nf new file mode 100644 index 0000000..55f23ac --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/main.nf @@ -0,0 +1,3838 @@ +// samtools_flagstat v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "samtools_flagstat", + "namespace" : "samtools", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--bam", + "description" : "BAM input files.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bai", + "description" : "BAM index file.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "File containing samtools stats output.\n", + "example" : [ + "output.flagstat" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Counts the number of alignments in SAM/BAM/CRAM files for each FLAG type.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "stats", + "mapping", + "counts", + "bam", + "sam", + "cram" + ], + "license" : "MIT/Expat", + "references" : { + "doi" : [ + "10.1093/bioinformatics/btp352", + "10.1093/gigascience/giab008" + ] + }, + "links" : { + "repository" : "https://github.com/samtools/samtools", + "homepage" : "https://www.htslib.org/", + "documentation" : "https://www.htslib.org/doc/samtools-flagstat.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\\\nsed 's#Using ##;s# \\\\([0-9\\\\.]*\\\\)$#: \\\\1#' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/samtools/samtools_flagstat/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/samtools/samtools_flagstat", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -e + +samtools flagstat \\\\ + "\\$par_bam" \\\\ + > "\\$par_output" + +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/samtools/samtools_flagstat", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/nextflow.config new file mode 100644 index 0000000..a198272 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'samtools/samtools_flagstat' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Counts the number of alignments in SAM/BAM/CRAM files for each FLAG type.' + author = 'Emma Rousseau' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/nextflow_schema.json new file mode 100644 index 0000000..77d8de7 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/nextflow_schema.json @@ -0,0 +1,105 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "samtools_flagstat", +"description": "Counts the number of alignments in SAM/BAM/CRAM files for each FLAG type.", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "bam": { + "type": + "string", + "description": "Type: `file`. BAM input files", + "help_text": "Type: `file`. BAM input files.\n" + + } + + + , + "bai": { + "type": + "string", + "description": "Type: `file`. BAM index file", + "help_text": "Type: `file`. BAM index file.\n" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.flagstat`, example: `output.flagstat`. File containing samtools stats output", + "help_text": "Type: `file`, required, default: `$id.$key.output.flagstat`, example: `output.flagstat`. File containing samtools stats output.\n" + , + "default":"$id.$key.output.flagstat" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/.config.vsh.yaml new file mode 100644 index 0000000..b5ebcb8 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/.config.vsh.yaml @@ -0,0 +1,224 @@ +name: "samtools_idxstats" +namespace: "samtools" +version: "v0.3.1" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--bam" + description: "BAM input file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bai" + description: "BAM index file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fasta" + description: "Reference file the CRAM was created with (optional)." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "File containing samtools stats output in tab-delimited format.\n" + info: null + example: + - "output.idxstats" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Reports alignment summary statistics for a BAM file." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "stats" +- "mapping" +- "counts" +- "chromosome" +- "bam" +- "sam" +- "cram" +license: "MIT/Expat" +references: + doi: + - "10.1093/bioinformatics/btp352" + - "10.1093/gigascience/giab008" +links: + repository: "https://github.com/samtools/samtools" + homepage: "https://www.htslib.org/" + documentation: "https://www.htslib.org/doc/samtools-idxstats.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\ + \ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/samtools/samtools_idxstats/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/samtools/samtools_idxstats" + executable: "target/nextflow/samtools/samtools_idxstats/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/main.nf new file mode 100644 index 0000000..407d16c --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/main.nf @@ -0,0 +1,3848 @@ +// samtools_idxstats v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "samtools_idxstats", + "namespace" : "samtools", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--bam", + "description" : "BAM input file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bai", + "description" : "BAM index file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fasta", + "description" : "Reference file the CRAM was created with (optional).", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "File containing samtools stats output in tab-delimited format.\n", + "example" : [ + "output.idxstats" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Reports alignment summary statistics for a BAM file.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "stats", + "mapping", + "counts", + "chromosome", + "bam", + "sam", + "cram" + ], + "license" : "MIT/Expat", + "references" : { + "doi" : [ + "10.1093/bioinformatics/btp352", + "10.1093/gigascience/giab008" + ] + }, + "links" : { + "repository" : "https://github.com/samtools/samtools", + "homepage" : "https://www.htslib.org/", + "documentation" : "https://www.htslib.org/doc/samtools-idxstats.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\\\nsed 's#Using ##;s# \\\\([0-9\\\\.]*\\\\)$#: \\\\1#' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/samtools/samtools_idxstats/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/samtools/samtools_idxstats", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTA+x} ]; then echo "${VIASH_PAR_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fasta='&'#" ; else echo "# par_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -e + +samtools idxstats "\\$par_bam" > "\\$par_output" +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/samtools/samtools_idxstats", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/nextflow.config new file mode 100644 index 0000000..3cc2c7a --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'samtools/samtools_idxstats' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Reports alignment summary statistics for a BAM file.' + author = 'Emma Rousseau' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/nextflow_schema.json new file mode 100644 index 0000000..3b43ccc --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/nextflow_schema.json @@ -0,0 +1,115 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "samtools_idxstats", +"description": "Reports alignment summary statistics for a BAM file.", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "bam": { + "type": + "string", + "description": "Type: `file`. BAM input file", + "help_text": "Type: `file`. BAM input file." + + } + + + , + "bai": { + "type": + "string", + "description": "Type: `file`. BAM index file", + "help_text": "Type: `file`. BAM index file." + + } + + + , + "fasta": { + "type": + "string", + "description": "Type: `file`. Reference file the CRAM was created with (optional)", + "help_text": "Type: `file`. Reference file the CRAM was created with (optional)." + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.idxstats`, example: `output.idxstats`. File containing samtools stats output in tab-delimited format", + "help_text": "Type: `file`, required, default: `$id.$key.output.idxstats`, example: `output.idxstats`. File containing samtools stats output in tab-delimited format.\n" + , + "default":"$id.$key.output.idxstats" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/.config.vsh.yaml new file mode 100644 index 0000000..3241051 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/.config.vsh.yaml @@ -0,0 +1,230 @@ +name: "samtools_index" +namespace: "samtools" +version: "v0.3.1" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Input file name" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output file name" + info: null + example: + - "out.bam.bai" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Options" + arguments: + - type: "boolean_true" + name: "--bai" + alternatives: + - "-b" + description: "Generate BAM index" + info: null + direction: "input" + - type: "boolean_true" + name: "--csi" + alternatives: + - "-c" + description: "Create a CSI index for BAM files instead of the traditional BAI\ + \ \nindex. This will be required for genomes with larger chromosome \nsizes.\n" + info: null + direction: "input" + - type: "integer" + name: "--min_shift" + alternatives: + - "-m" + description: "Create a CSI index, with a minimum interval size of 2^INT.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Index SAM/BAM/CRAM files." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "index" +- "bam" +- "sam" +- "cram" +license: "MIT/Expat" +references: + doi: + - "10.1093/bioinformatics/btp352" + - "10.1093/gigascience/giab008" +links: + repository: "https://github.com/samtools/samtools" + homepage: "https://www.htslib.org/" + documentation: "https://www.htslib.org/doc/samtools-index.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\ + \ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/samtools/samtools_index/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/samtools/samtools_index" + executable: "target/nextflow/samtools/samtools_index/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/main.nf new file mode 100644 index 0000000..378b33b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/main.nf @@ -0,0 +1,3872 @@ +// samtools_index v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "samtools_index", + "namespace" : "samtools", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input file name", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output file name", + "example" : [ + "out.bam.bai" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--bai", + "alternatives" : [ + "-b" + ], + "description" : "Generate BAM index", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--csi", + "alternatives" : [ + "-c" + ], + "description" : "Create a CSI index for BAM files instead of the traditional BAI \nindex. This will be required for genomes with larger chromosome \nsizes.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--min_shift", + "alternatives" : [ + "-m" + ], + "description" : "Create a CSI index, with a minimum interval size of 2^INT.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Index SAM/BAM/CRAM files.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "index", + "bam", + "sam", + "cram" + ], + "license" : "MIT/Expat", + "references" : { + "doi" : [ + "10.1093/bioinformatics/btp352", + "10.1093/gigascience/giab008" + ] + }, + "links" : { + "repository" : "https://github.com/samtools/samtools", + "homepage" : "https://www.htslib.org/", + "documentation" : "https://www.htslib.org/doc/samtools-index.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\\\nsed 's#Using ##;s# \\\\([0-9\\\\.]*\\\\)$#: \\\\1#' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/samtools/samtools_index/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/samtools/samtools_index", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) +$( if [ ! -z ${VIASH_PAR_CSI+x} ]; then echo "${VIASH_PAR_CSI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_csi='&'#" ; else echo "# par_csi="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_SHIFT+x} ]; then echo "${VIASH_PAR_MIN_SHIFT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_shift='&'#" ; else echo "# par_min_shift="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -e +[[ "\\$par_multiple" == "false" ]] && unset par_multiple +[[ "\\$par_bai" == "false" ]] && unset par_bai +[[ "\\$par_csi" == "false" ]] && unset par_csi +[[ "\\$par_multiple" == "false" ]] && unset par_multiple + +samtools index \\\\ + "\\$par_input" \\\\ + \\${par_csi:+-c} \\\\ + \\${par_bai:+-b} \\\\ + \\${par_min_shift:+-m "par_min_shift"} \\\\ + \\${par_multiple:+-M} \\\\ + -o "\\$par_output" +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/samtools/samtools_index", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/nextflow.config new file mode 100644 index 0000000..5edfb04 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'samtools/samtools_index' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Index SAM/BAM/CRAM files.' + author = 'Emma Rousseau' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/nextflow_schema.json new file mode 100644 index 0000000..1617235 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/nextflow_schema.json @@ -0,0 +1,141 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "samtools_index", +"description": "Index SAM/BAM/CRAM files.", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input file name", + "help_text": "Type: `file`, required. Input file name" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.bai`, example: `out.bam.bai`. Output file name", + "help_text": "Type: `file`, required, default: `$id.$key.output.bai`, example: `out.bam.bai`. Output file name" + , + "default":"$id.$key.output.bai" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "bai": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Generate BAM index", + "help_text": "Type: `boolean_true`, default: `false`. Generate BAM index" + , + "default":false + } + + + , + "csi": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Create a CSI index for BAM files instead of the traditional BAI \nindex", + "help_text": "Type: `boolean_true`, default: `false`. Create a CSI index for BAM files instead of the traditional BAI \nindex. This will be required for genomes with larger chromosome \nsizes.\n" + , + "default":false + } + + + , + "min_shift": { + "type": + "integer", + "description": "Type: `integer`. Create a CSI index, with a minimum interval size of 2^INT", + "help_text": "Type: `integer`. Create a CSI index, with a minimum interval size of 2^INT.\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/.config.vsh.yaml new file mode 100644 index 0000000..abcc59c --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/.config.vsh.yaml @@ -0,0 +1,373 @@ +name: "samtools_sort" +namespace: "samtools" +version: "v0.3.1" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "SAM/BAM/CRAM input file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Write final output to file.\n" + info: null + example: + - "out.bam" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--output_fmt" + alternatives: + - "-O" + description: "Specify output format (SAM, BAM, CRAM).\n" + info: null + example: + - "BAM" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--output_fmt_option" + description: "Specify a single output file format option in the form\nof OPTION\ + \ or OPTION=VALUE.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--reference" + description: "Reference sequence FASTA FILE.\n" + info: null + example: + - "ref.fa" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--write_index" + description: "Automatically index the output files.\n" + info: null + direction: "input" + - type: "string" + name: "--prefix" + alternatives: + - "-T" + description: "Write temporary files to PREFIX.nnnn.bam.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--no_PG" + description: "Do not add a PG line.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--template_coordinate" + description: "Sort by template-coordinate.\n" + info: null + direction: "input" + - type: "string" + name: "--input_fmt_option" + description: "Specify a single input file format option in the form\nof OPTION\ + \ or OPTION=VALUE.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Options" + arguments: + - type: "integer" + name: "--compression" + alternatives: + - "-l" + description: "Set compression level, from 0 (uncompressed) to 9 (best).\n" + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--uncompressed" + alternatives: + - "-u" + description: "Output uncompressed data (equivalent to --compression 0).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--minimiser" + alternatives: + - "-M" + description: "Use minimiser for clustering unaligned/unplaced reads.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--not_reverse" + alternatives: + - "-R" + description: "Do not use reverse strand (only compatible with --minimiser)\n" + info: null + direction: "input" + - type: "integer" + name: "--kmer_size" + alternatives: + - "-K" + description: "Kmer size to use for minimiser.\n" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--order" + alternatives: + - "-I" + description: "Order minimisers by their position in FILE FASTA.\n" + info: null + example: + - "ref.fa" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--window" + alternatives: + - "-w" + description: "Window size for minimiser INDEXING VIA --order REF.FA.\n" + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--homopolymers" + alternatives: + - "-H" + description: "Squash homopolymers when computing minimiser.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--natural_sort" + alternatives: + - "-n" + description: "Sort by read name (natural): cannot be used with samtools index.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--ascii_sort" + alternatives: + - "-N" + description: "Sort by read name (ASCII): cannot be used with samtools index.\n" + info: null + direction: "input" + - type: "string" + name: "--tag" + alternatives: + - "-t" + description: "Sort by value of TAG. Uses position as secondary index \n(or read\ + \ name if --natural_sort is set).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Sort SAM/BAM/CRAM file." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "sort" +- "bam" +- "sam" +- "cram" +license: "MIT/Expat" +references: + doi: + - "10.1093/bioinformatics/btp352" + - "10.1093/gigascience/giab008" +links: + repository: "https://github.com/samtools/samtools" + homepage: "https://www.htslib.org/" + documentation: "https://www.htslib.org/doc/samtools-sort.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\ + \ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/samtools/samtools_sort/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/samtools/samtools_sort" + executable: "target/nextflow/samtools/samtools_sort/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/main.nf new file mode 100644 index 0000000..631f2de --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/main.nf @@ -0,0 +1,4092 @@ +// samtools_sort v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "samtools_sort", + "namespace" : "samtools", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "SAM/BAM/CRAM input file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Write final output to file.\n", + "example" : [ + "out.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--output_fmt", + "alternatives" : [ + "-O" + ], + "description" : "Specify output format (SAM, BAM, CRAM).\n", + "example" : [ + "BAM" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--output_fmt_option", + "description" : "Specify a single output file format option in the form\nof OPTION or OPTION=VALUE.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--reference", + "description" : "Reference sequence FASTA FILE.\n", + "example" : [ + "ref.fa" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--write_index", + "description" : "Automatically index the output files.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--prefix", + "alternatives" : [ + "-T" + ], + "description" : "Write temporary files to PREFIX.nnnn.bam.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--no_PG", + "description" : "Do not add a PG line.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--template_coordinate", + "description" : "Sort by template-coordinate.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--input_fmt_option", + "description" : "Specify a single input file format option in the form\nof OPTION or OPTION=VALUE.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "integer", + "name" : "--compression", + "alternatives" : [ + "-l" + ], + "description" : "Set compression level, from 0 (uncompressed) to 9 (best).\n", + "default" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--uncompressed", + "alternatives" : [ + "-u" + ], + "description" : "Output uncompressed data (equivalent to --compression 0).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--minimiser", + "alternatives" : [ + "-M" + ], + "description" : "Use minimiser for clustering unaligned/unplaced reads.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--not_reverse", + "alternatives" : [ + "-R" + ], + "description" : "Do not use reverse strand (only compatible with --minimiser)\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--kmer_size", + "alternatives" : [ + "-K" + ], + "description" : "Kmer size to use for minimiser.\n", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--order", + "alternatives" : [ + "-I" + ], + "description" : "Order minimisers by their position in FILE FASTA.\n", + "example" : [ + "ref.fa" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--window", + "alternatives" : [ + "-w" + ], + "description" : "Window size for minimiser INDEXING VIA --order REF.FA.\n", + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--homopolymers", + "alternatives" : [ + "-H" + ], + "description" : "Squash homopolymers when computing minimiser.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--natural_sort", + "alternatives" : [ + "-n" + ], + "description" : "Sort by read name (natural): cannot be used with samtools index.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--ascii_sort", + "alternatives" : [ + "-N" + ], + "description" : "Sort by read name (ASCII): cannot be used with samtools index.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--tag", + "alternatives" : [ + "-t" + ], + "description" : "Sort by value of TAG. Uses position as secondary index \n(or read name if --natural_sort is set).\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Sort SAM/BAM/CRAM file.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "sort", + "bam", + "sam", + "cram" + ], + "license" : "MIT/Expat", + "references" : { + "doi" : [ + "10.1093/bioinformatics/btp352", + "10.1093/gigascience/giab008" + ] + }, + "links" : { + "repository" : "https://github.com/samtools/samtools", + "homepage" : "https://www.htslib.org/", + "documentation" : "https://www.htslib.org/doc/samtools-sort.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\\\nsed 's#Using ##;s# \\\\([0-9\\\\.]*\\\\)$#: \\\\1#' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/samtools/samtools_sort/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/samtools/samtools_sort", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_FMT+x} ]; then echo "${VIASH_PAR_OUTPUT_FMT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_fmt='&'#" ; else echo "# par_output_fmt="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_FMT_OPTION+x} ]; then echo "${VIASH_PAR_OUTPUT_FMT_OPTION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_fmt_option='&'#" ; else echo "# par_output_fmt_option="; fi ) +$( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "${VIASH_PAR_REFERENCE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reference='&'#" ; else echo "# par_reference="; fi ) +$( if [ ! -z ${VIASH_PAR_WRITE_INDEX+x} ]; then echo "${VIASH_PAR_WRITE_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_write_index='&'#" ; else echo "# par_write_index="; fi ) +$( if [ ! -z ${VIASH_PAR_PREFIX+x} ]; then echo "${VIASH_PAR_PREFIX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_prefix='&'#" ; else echo "# par_prefix="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_PG+x} ]; then echo "${VIASH_PAR_NO_PG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_PG='&'#" ; else echo "# par_no_PG="; fi ) +$( if [ ! -z ${VIASH_PAR_TEMPLATE_COORDINATE+x} ]; then echo "${VIASH_PAR_TEMPLATE_COORDINATE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_template_coordinate='&'#" ; else echo "# par_template_coordinate="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_FMT_OPTION+x} ]; then echo "${VIASH_PAR_INPUT_FMT_OPTION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_fmt_option='&'#" ; else echo "# par_input_fmt_option="; fi ) +$( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "${VIASH_PAR_COMPRESSION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_compression='&'#" ; else echo "# par_compression="; fi ) +$( if [ ! -z ${VIASH_PAR_UNCOMPRESSED+x} ]; then echo "${VIASH_PAR_UNCOMPRESSED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_uncompressed='&'#" ; else echo "# par_uncompressed="; fi ) +$( if [ ! -z ${VIASH_PAR_MINIMISER+x} ]; then echo "${VIASH_PAR_MINIMISER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_minimiser='&'#" ; else echo "# par_minimiser="; fi ) +$( if [ ! -z ${VIASH_PAR_NOT_REVERSE+x} ]; then echo "${VIASH_PAR_NOT_REVERSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_not_reverse='&'#" ; else echo "# par_not_reverse="; fi ) +$( if [ ! -z ${VIASH_PAR_KMER_SIZE+x} ]; then echo "${VIASH_PAR_KMER_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kmer_size='&'#" ; else echo "# par_kmer_size="; fi ) +$( if [ ! -z ${VIASH_PAR_ORDER+x} ]; then echo "${VIASH_PAR_ORDER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_order='&'#" ; else echo "# par_order="; fi ) +$( if [ ! -z ${VIASH_PAR_WINDOW+x} ]; then echo "${VIASH_PAR_WINDOW}" | sed "s#'#'\\"'\\"'#g;s#.*#par_window='&'#" ; else echo "# par_window="; fi ) +$( if [ ! -z ${VIASH_PAR_HOMOPOLYMERS+x} ]; then echo "${VIASH_PAR_HOMOPOLYMERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_homopolymers='&'#" ; else echo "# par_homopolymers="; fi ) +$( if [ ! -z ${VIASH_PAR_NATURAL_SORT+x} ]; then echo "${VIASH_PAR_NATURAL_SORT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_natural_sort='&'#" ; else echo "# par_natural_sort="; fi ) +$( if [ ! -z ${VIASH_PAR_ASCII_SORT+x} ]; then echo "${VIASH_PAR_ASCII_SORT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ascii_sort='&'#" ; else echo "# par_ascii_sort="; fi ) +$( if [ ! -z ${VIASH_PAR_TAG+x} ]; then echo "${VIASH_PAR_TAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tag='&'#" ; else echo "# par_tag="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -e + +unset_if_false=( + par_uncompressed + par_minimiser + par_not_reverse + par_homopolymers + par_natural_sort + par_ascii_sort + par_template_coordinate + par_write_index + par_no_PG +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + + +samtools sort \\\\ + \\${par_compression:+-l "\\$par_compression"} \\\\ + \\${par_uncompressed:+-u} \\\\ + \\${par_minimiser:+-M} \\\\ + \\${par_not_reverse:+-R} \\\\ + \\${par_kmer_size:+-K "\\$par_kmer_size"} \\\\ + \\${par_order:+-I "\\$par_order"} \\\\ + \\${par_window:+-w "\\$par_window"} \\\\ + \\${par_homopolymers:+-H} \\\\ + \\${par_natural_sort:+-n} \\\\ + \\${par_ascii_sort:+-N} \\\\ + \\${par_tag:+-t "\\$par_tag"} \\\\ + \\${par_input_fmt_option:+--input-fmt-option "\\$par_input_fmt_option"} \\\\ + \\${par_template_coordinate:+--template-coordinate} \\\\ + \\${par_write_index:+--write-index} \\\\ + \\${par_prefix:+-T "\\$par_prefix"} \\\\ + \\${par_no_PG:+--no-PG} \\\\ + \\${par_output_fmt:+-O "\\$par_output_fmt"} \\\\ + \\${par_output_fmt_option:+--output-fmt-option "\\$par_output_fmt_option"} \\\\ + \\${par_reference:+--reference "\\$par_reference"} \\\\ + -o "\\$par_output" \\\\ + "\\$par_input" + +# save text files containing the output of samtools view for later comparison +samtools view "\\$par_output" -o "\\$par_output".txt +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/samtools/samtools_sort", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/nextflow.config new file mode 100644 index 0000000..607e766 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'samtools/samtools_sort' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Sort SAM/BAM/CRAM file.' + author = 'Emma Rousseau' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/nextflow_schema.json new file mode 100644 index 0000000..b46fa99 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/nextflow_schema.json @@ -0,0 +1,309 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "samtools_sort", +"description": "Sort SAM/BAM/CRAM file.", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. SAM/BAM/CRAM input file", + "help_text": "Type: `file`, required. SAM/BAM/CRAM input file." + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.bam`, example: `out.bam`. Write final output to file", + "help_text": "Type: `file`, required, default: `$id.$key.output.bam`, example: `out.bam`. Write final output to file.\n" + , + "default":"$id.$key.output.bam" + } + + + , + "output_fmt": { + "type": + "string", + "description": "Type: `string`, example: `BAM`. Specify output format (SAM, BAM, CRAM)", + "help_text": "Type: `string`, example: `BAM`. Specify output format (SAM, BAM, CRAM).\n" + + } + + + , + "output_fmt_option": { + "type": + "string", + "description": "Type: `string`. Specify a single output file format option in the form\nof OPTION or OPTION=VALUE", + "help_text": "Type: `string`. Specify a single output file format option in the form\nof OPTION or OPTION=VALUE.\n" + + } + + + , + "reference": { + "type": + "string", + "description": "Type: `file`, example: `ref.fa`. Reference sequence FASTA FILE", + "help_text": "Type: `file`, example: `ref.fa`. Reference sequence FASTA FILE.\n" + + } + + + , + "write_index": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Automatically index the output files", + "help_text": "Type: `boolean_true`, default: `false`. Automatically index the output files.\n" + , + "default":false + } + + + , + "prefix": { + "type": + "string", + "description": "Type: `string`. Write temporary files to PREFIX", + "help_text": "Type: `string`. Write temporary files to PREFIX.nnnn.bam.\n" + + } + + + , + "no_PG": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Do not add a PG line", + "help_text": "Type: `boolean_true`, default: `false`. Do not add a PG line.\n" + , + "default":false + } + + + , + "template_coordinate": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Sort by template-coordinate", + "help_text": "Type: `boolean_true`, default: `false`. Sort by template-coordinate.\n" + , + "default":false + } + + + , + "input_fmt_option": { + "type": + "string", + "description": "Type: `string`. Specify a single input file format option in the form\nof OPTION or OPTION=VALUE", + "help_text": "Type: `string`. Specify a single input file format option in the form\nof OPTION or OPTION=VALUE.\n" + + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "compression": { + "type": + "integer", + "description": "Type: `integer`, default: `0`. Set compression level, from 0 (uncompressed) to 9 (best)", + "help_text": "Type: `integer`, default: `0`. Set compression level, from 0 (uncompressed) to 9 (best).\n" + , + "default":0 + } + + + , + "uncompressed": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output uncompressed data (equivalent to --compression 0)", + "help_text": "Type: `boolean_true`, default: `false`. Output uncompressed data (equivalent to --compression 0).\n" + , + "default":false + } + + + , + "minimiser": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use minimiser for clustering unaligned/unplaced reads", + "help_text": "Type: `boolean_true`, default: `false`. Use minimiser for clustering unaligned/unplaced reads.\n" + , + "default":false + } + + + , + "not_reverse": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Do not use reverse strand (only compatible with --minimiser)\n", + "help_text": "Type: `boolean_true`, default: `false`. Do not use reverse strand (only compatible with --minimiser)\n" + , + "default":false + } + + + , + "kmer_size": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. Kmer size to use for minimiser", + "help_text": "Type: `integer`, example: `20`. Kmer size to use for minimiser.\n" + + } + + + , + "order": { + "type": + "string", + "description": "Type: `file`, example: `ref.fa`. Order minimisers by their position in FILE FASTA", + "help_text": "Type: `file`, example: `ref.fa`. Order minimisers by their position in FILE FASTA.\n" + + } + + + , + "window": { + "type": + "integer", + "description": "Type: `integer`, example: `100`. Window size for minimiser INDEXING VIA --order REF", + "help_text": "Type: `integer`, example: `100`. Window size for minimiser INDEXING VIA --order REF.FA.\n" + + } + + + , + "homopolymers": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Squash homopolymers when computing minimiser", + "help_text": "Type: `boolean_true`, default: `false`. Squash homopolymers when computing minimiser.\n" + , + "default":false + } + + + , + "natural_sort": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Sort by read name (natural): cannot be used with samtools index", + "help_text": "Type: `boolean_true`, default: `false`. Sort by read name (natural): cannot be used with samtools index.\n" + , + "default":false + } + + + , + "ascii_sort": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Sort by read name (ASCII): cannot be used with samtools index", + "help_text": "Type: `boolean_true`, default: `false`. Sort by read name (ASCII): cannot be used with samtools index.\n" + , + "default":false + } + + + , + "tag": { + "type": + "string", + "description": "Type: `string`. Sort by value of TAG", + "help_text": "Type: `string`. Sort by value of TAG. Uses position as secondary index \n(or read name if --natural_sort is set).\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/.config.vsh.yaml new file mode 100644 index 0000000..4b63a7b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/.config.vsh.yaml @@ -0,0 +1,442 @@ +name: "samtools_stats" +namespace: "samtools" +version: "v0.3.1" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Input file.\n" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bai" + description: "Index file.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fasta" + description: "Reference file the CRAM was created with.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--coverage" + alternatives: + - "-c" + description: "Coverage distribution min;max;step. Default: [1, 1000, 1].\n" + info: null + example: + - 1 + - 1000 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "boolean_true" + name: "--remove_dups" + alternatives: + - "-d" + description: "Exclude from statistics reads marked as duplicates.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--customized_index_file" + alternatives: + - "-X" + description: "Use a customized index file.\n" + info: null + direction: "input" + - type: "string" + name: "--required_flag" + alternatives: + - "-f" + description: "Required flag, 0 for unset. See also `samtools flags`. Default:\ + \ `\"0\"`.\n" + info: null + example: + - "0" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--filtering_flag" + alternatives: + - "-F" + description: "Filtering flag, 0 for unset. See also `samtools flags`. Default:\ + \ `0`.\n" + info: null + example: + - "0" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--GC_depth" + description: "The size of GC-depth bins (decreasing bin size increases memory\ + \ requirement). Default: `20000`.\n" + info: null + example: + - 20000.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--insert_size" + alternatives: + - "-i" + description: "Maximum insert size. Default: `8000`.\n" + info: null + example: + - 8000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--id" + alternatives: + - "-I" + description: "Include only listed read group or sample name.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--read_length" + alternatives: + - "-l" + description: "Include in the statistics only reads with the given read length.\ + \ Default: `-1`.\n" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--most_inserts" + alternatives: + - "-m" + description: "Report only the main part of inserts. Default: `0.99`.\n" + info: null + example: + - 0.99 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--split_prefix" + alternatives: + - "-P" + description: "Path or string prefix for filepaths output by --split (default is\ + \ input filename).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--trim_quality" + alternatives: + - "-q" + description: "The BWA trimming parameter. Default: `0`.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--ref_seq" + alternatives: + - "-r" + description: "Reference sequence (required for GC-depth and mismatches-per-cycle\ + \ calculation).\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--split" + alternatives: + - "-S" + description: "Also write statistics to separate files split by tagged field.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--target_regions" + alternatives: + - "-t" + description: "Do stats in these regions only. Tab-delimited file chr,from,to,\ + \ 1-based, inclusive.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--sparse" + alternatives: + - "-x" + description: "Suppress outputting IS rows where there are no insertions.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--remove_overlaps" + alternatives: + - "-p" + description: "Remove overlaps of paired-end reads from coverage and base count\ + \ computations.\n" + info: null + direction: "input" + - type: "integer" + name: "--cov_threshold" + alternatives: + - "-g" + description: "Only bases with coverage above this value will be included in the\ + \ target percentage computation. Default: `0`.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--input_fmt_option" + description: "Specify a single input file format option in the form of OPTION\ + \ or OPTION=VALUE.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--reference" + description: "Reference sequence FASTA FILE.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output file.\n" + info: null + example: + - "out.txt" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Reports alignment summary statistics for a BAM file." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "statistics" +- "counts" +- "bam" +- "sam" +- "cram" +license: "MIT/Expat" +references: + doi: + - "10.1093/bioinformatics/btp352" + - "10.1093/gigascience/giab008" +links: + repository: "https://github.com/samtools/samtools" + homepage: "https://www.htslib.org/" + documentation: "https://www.htslib.org/doc/samtools-stats.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\ + \ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/samtools/samtools_stats/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/samtools/samtools_stats" + executable: "target/nextflow/samtools/samtools_stats/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/main.nf new file mode 100644 index 0000000..3bf8623 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/main.nf @@ -0,0 +1,4154 @@ +// samtools_stats v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "samtools_stats", + "namespace" : "samtools", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input file.\n", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bai", + "description" : "Index file.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fasta", + "description" : "Reference file the CRAM was created with.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--coverage", + "alternatives" : [ + "-c" + ], + "description" : "Coverage distribution min;max;step. Default: [1, 1000, 1].\n", + "example" : [ + 1, + 1000, + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--remove_dups", + "alternatives" : [ + "-d" + ], + "description" : "Exclude from statistics reads marked as duplicates.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--customized_index_file", + "alternatives" : [ + "-X" + ], + "description" : "Use a customized index file.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--required_flag", + "alternatives" : [ + "-f" + ], + "description" : "Required flag, 0 for unset. See also `samtools flags`. Default: `\\"0\\"`.\n", + "example" : [ + "0" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--filtering_flag", + "alternatives" : [ + "-F" + ], + "description" : "Filtering flag, 0 for unset. See also `samtools flags`. Default: `0`.\n", + "example" : [ + "0" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--GC_depth", + "description" : "The size of GC-depth bins (decreasing bin size increases memory requirement). Default: `20000`.\n", + "example" : [ + 20000.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--insert_size", + "alternatives" : [ + "-i" + ], + "description" : "Maximum insert size. Default: `8000`.\n", + "example" : [ + 8000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--id", + "alternatives" : [ + "-I" + ], + "description" : "Include only listed read group or sample name.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--read_length", + "alternatives" : [ + "-l" + ], + "description" : "Include in the statistics only reads with the given read length. Default: `-1`.\n", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--most_inserts", + "alternatives" : [ + "-m" + ], + "description" : "Report only the main part of inserts. Default: `0.99`.\n", + "example" : [ + 0.99 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--split_prefix", + "alternatives" : [ + "-P" + ], + "description" : "Path or string prefix for filepaths output by --split (default is input filename).\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--trim_quality", + "alternatives" : [ + "-q" + ], + "description" : "The BWA trimming parameter. Default: `0`.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--ref_seq", + "alternatives" : [ + "-r" + ], + "description" : "Reference sequence (required for GC-depth and mismatches-per-cycle calculation).\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--split", + "alternatives" : [ + "-S" + ], + "description" : "Also write statistics to separate files split by tagged field.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--target_regions", + "alternatives" : [ + "-t" + ], + "description" : "Do stats in these regions only. Tab-delimited file chr,from,to, 1-based, inclusive.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--sparse", + "alternatives" : [ + "-x" + ], + "description" : "Suppress outputting IS rows where there are no insertions.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--remove_overlaps", + "alternatives" : [ + "-p" + ], + "description" : "Remove overlaps of paired-end reads from coverage and base count computations.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--cov_threshold", + "alternatives" : [ + "-g" + ], + "description" : "Only bases with coverage above this value will be included in the target percentage computation. Default: `0`.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--input_fmt_option", + "description" : "Specify a single input file format option in the form of OPTION or OPTION=VALUE.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--reference", + "description" : "Reference sequence FASTA FILE.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output file.\n", + "example" : [ + "out.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Reports alignment summary statistics for a BAM file.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "statistics", + "counts", + "bam", + "sam", + "cram" + ], + "license" : "MIT/Expat", + "references" : { + "doi" : [ + "10.1093/bioinformatics/btp352", + "10.1093/gigascience/giab008" + ] + }, + "links" : { + "repository" : "https://github.com/samtools/samtools", + "homepage" : "https://www.htslib.org/", + "documentation" : "https://www.htslib.org/doc/samtools-stats.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\\\nsed 's#Using ##;s# \\\\([0-9\\\\.]*\\\\)$#: \\\\1#' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/samtools/samtools_stats/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/samtools/samtools_stats", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTA+x} ]; then echo "${VIASH_PAR_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fasta='&'#" ; else echo "# par_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_COVERAGE+x} ]; then echo "${VIASH_PAR_COVERAGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_coverage='&'#" ; else echo "# par_coverage="; fi ) +$( if [ ! -z ${VIASH_PAR_REMOVE_DUPS+x} ]; then echo "${VIASH_PAR_REMOVE_DUPS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_remove_dups='&'#" ; else echo "# par_remove_dups="; fi ) +$( if [ ! -z ${VIASH_PAR_CUSTOMIZED_INDEX_FILE+x} ]; then echo "${VIASH_PAR_CUSTOMIZED_INDEX_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_customized_index_file='&'#" ; else echo "# par_customized_index_file="; fi ) +$( if [ ! -z ${VIASH_PAR_REQUIRED_FLAG+x} ]; then echo "${VIASH_PAR_REQUIRED_FLAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_required_flag='&'#" ; else echo "# par_required_flag="; fi ) +$( if [ ! -z ${VIASH_PAR_FILTERING_FLAG+x} ]; then echo "${VIASH_PAR_FILTERING_FLAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_filtering_flag='&'#" ; else echo "# par_filtering_flag="; fi ) +$( if [ ! -z ${VIASH_PAR_GC_DEPTH+x} ]; then echo "${VIASH_PAR_GC_DEPTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_GC_depth='&'#" ; else echo "# par_GC_depth="; fi ) +$( if [ ! -z ${VIASH_PAR_INSERT_SIZE+x} ]; then echo "${VIASH_PAR_INSERT_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_insert_size='&'#" ; else echo "# par_insert_size="; fi ) +$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) +$( if [ ! -z ${VIASH_PAR_READ_LENGTH+x} ]; then echo "${VIASH_PAR_READ_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_read_length='&'#" ; else echo "# par_read_length="; fi ) +$( if [ ! -z ${VIASH_PAR_MOST_INSERTS+x} ]; then echo "${VIASH_PAR_MOST_INSERTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_most_inserts='&'#" ; else echo "# par_most_inserts="; fi ) +$( if [ ! -z ${VIASH_PAR_SPLIT_PREFIX+x} ]; then echo "${VIASH_PAR_SPLIT_PREFIX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_split_prefix='&'#" ; else echo "# par_split_prefix="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIM_QUALITY+x} ]; then echo "${VIASH_PAR_TRIM_QUALITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trim_quality='&'#" ; else echo "# par_trim_quality="; fi ) +$( if [ ! -z ${VIASH_PAR_REF_SEQ+x} ]; then echo "${VIASH_PAR_REF_SEQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ref_seq='&'#" ; else echo "# par_ref_seq="; fi ) +$( if [ ! -z ${VIASH_PAR_SPLIT+x} ]; then echo "${VIASH_PAR_SPLIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_split='&'#" ; else echo "# par_split="; fi ) +$( if [ ! -z ${VIASH_PAR_TARGET_REGIONS+x} ]; then echo "${VIASH_PAR_TARGET_REGIONS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_target_regions='&'#" ; else echo "# par_target_regions="; fi ) +$( if [ ! -z ${VIASH_PAR_SPARSE+x} ]; then echo "${VIASH_PAR_SPARSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sparse='&'#" ; else echo "# par_sparse="; fi ) +$( if [ ! -z ${VIASH_PAR_REMOVE_OVERLAPS+x} ]; then echo "${VIASH_PAR_REMOVE_OVERLAPS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_remove_overlaps='&'#" ; else echo "# par_remove_overlaps="; fi ) +$( if [ ! -z ${VIASH_PAR_COV_THRESHOLD+x} ]; then echo "${VIASH_PAR_COV_THRESHOLD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cov_threshold='&'#" ; else echo "# par_cov_threshold="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_FMT_OPTION+x} ]; then echo "${VIASH_PAR_INPUT_FMT_OPTION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_fmt_option='&'#" ; else echo "# par_input_fmt_option="; fi ) +$( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "${VIASH_PAR_REFERENCE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reference='&'#" ; else echo "# par_reference="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -e + +[[ "\\$par_remove_dups" == "false" ]] && unset par_remove_dups +[[ "\\$par_customized_index_file" == "false" ]] && unset par_customized_index_file +[[ "\\$par_sparse" == "false" ]] && unset par_sparse +[[ "\\$par_remove_overlaps" == "false" ]] && unset par_remove_overlaps + +# change the coverage input from X;X;X to X,X,X +par_coverage=\\$(echo "\\$par_coverage" | tr ';' ',') + +samtools stats \\\\ + \\${par_coverage:+-c "\\$par_coverage"} \\\\ + \\${par_remove_dups:+-d} \\\\ + \\${par_required_flag:+-f "\\$par_required_flag"} \\\\ + \\${par_filtering_flag:+-F "\\$par_filtering_flag"} \\\\ + \\${par_GC_depth:+--GC-depth "\\$par_GC_depth"} \\\\ + \\${par_insert_size:+-i "\\$par_insert_size"} \\\\ + \\${par_id:+-I "\\$par_id"} \\\\ + \\${par_read_length:+-l "\\$par_read_length"} \\\\ + \\${par_most_inserts:+-m "\\$par_most_inserts"} \\\\ + \\${par_split_prefix:+-P "\\$par_split_prefix"} \\\\ + \\${par_trim_quality:+-q "\\$par_trim_quality"} \\\\ + \\${par_ref_seq:+-r "\\$par_ref_seq"} \\\\ + \\${par_split:+-S "\\$par_split"} \\\\ + \\${par_target_regions:+-t "\\$par_target_regions"} \\\\ + \\${par_sparse:+-x} \\\\ + \\${par_remove_overlaps:+-p} \\\\ + \\${par_cov_threshold:+-g "\\$par_cov_threshold"} \\\\ + \\${par_input_fmt_option:+-O "\\$par_input_fmt_option"} \\\\ + \\${par_reference:+-R "\\$par_reference"} \\\\ + "\\$par_input" \\\\ + > "\\$par_output" + +exit 0 +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/samtools/samtools_stats", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/nextflow.config new file mode 100644 index 0000000..1516180 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'samtools/samtools_stats' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Reports alignment summary statistics for a BAM file.' + author = 'Emma Rousseau' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/nextflow_schema.json new file mode 100644 index 0000000..4e161ad --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/nextflow_schema.json @@ -0,0 +1,319 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "samtools_stats", +"description": "Reports alignment summary statistics for a BAM file.", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input file", + "help_text": "Type: `file`, required. Input file.\n" + + } + + + , + "bai": { + "type": + "string", + "description": "Type: `file`. Index file", + "help_text": "Type: `file`. Index file.\n" + + } + + + , + "fasta": { + "type": + "string", + "description": "Type: `file`. Reference file the CRAM was created with", + "help_text": "Type: `file`. Reference file the CRAM was created with.\n" + + } + + + , + "coverage": { + "type": + "string", + "description": "Type: List of `integer`, example: `1;1000;1`, multiple_sep: `\";\"`. Coverage distribution min;max;step", + "help_text": "Type: List of `integer`, example: `1;1000;1`, multiple_sep: `\";\"`. Coverage distribution min;max;step. Default: [1, 1000, 1].\n" + + } + + + , + "remove_dups": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Exclude from statistics reads marked as duplicates", + "help_text": "Type: `boolean_true`, default: `false`. Exclude from statistics reads marked as duplicates.\n" + , + "default":false + } + + + , + "customized_index_file": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use a customized index file", + "help_text": "Type: `boolean_true`, default: `false`. Use a customized index file.\n" + , + "default":false + } + + + , + "required_flag": { + "type": + "string", + "description": "Type: `string`, example: `0`. Required flag, 0 for unset", + "help_text": "Type: `string`, example: `0`. Required flag, 0 for unset. See also `samtools flags`. Default: `\"0\"`.\n" + + } + + + , + "filtering_flag": { + "type": + "string", + "description": "Type: `string`, example: `0`. Filtering flag, 0 for unset", + "help_text": "Type: `string`, example: `0`. Filtering flag, 0 for unset. See also `samtools flags`. Default: `0`.\n" + + } + + + , + "GC_depth": { + "type": + "number", + "description": "Type: `double`, example: `20000.0`. The size of GC-depth bins (decreasing bin size increases memory requirement)", + "help_text": "Type: `double`, example: `20000.0`. The size of GC-depth bins (decreasing bin size increases memory requirement). Default: `20000`.\n" + + } + + + , + "insert_size": { + "type": + "integer", + "description": "Type: `integer`, example: `8000`. Maximum insert size", + "help_text": "Type: `integer`, example: `8000`. Maximum insert size. Default: `8000`.\n" + + } + + + , + "id": { + "type": + "string", + "description": "Type: `string`. Include only listed read group or sample name", + "help_text": "Type: `string`. Include only listed read group or sample name.\n" + + } + + + , + "read_length": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. Include in the statistics only reads with the given read length", + "help_text": "Type: `integer`, example: `-1`. Include in the statistics only reads with the given read length. Default: `-1`.\n" + + } + + + , + "most_inserts": { + "type": + "number", + "description": "Type: `double`, example: `0.99`. Report only the main part of inserts", + "help_text": "Type: `double`, example: `0.99`. Report only the main part of inserts. Default: `0.99`.\n" + + } + + + , + "split_prefix": { + "type": + "string", + "description": "Type: `string`. Path or string prefix for filepaths output by --split (default is input filename)", + "help_text": "Type: `string`. Path or string prefix for filepaths output by --split (default is input filename).\n" + + } + + + , + "trim_quality": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. The BWA trimming parameter", + "help_text": "Type: `integer`, example: `0`. The BWA trimming parameter. Default: `0`.\n" + + } + + + , + "ref_seq": { + "type": + "string", + "description": "Type: `file`. Reference sequence (required for GC-depth and mismatches-per-cycle calculation)", + "help_text": "Type: `file`. Reference sequence (required for GC-depth and mismatches-per-cycle calculation).\n" + + } + + + , + "split": { + "type": + "string", + "description": "Type: `string`. Also write statistics to separate files split by tagged field", + "help_text": "Type: `string`. Also write statistics to separate files split by tagged field.\n" + + } + + + , + "target_regions": { + "type": + "string", + "description": "Type: `file`. Do stats in these regions only", + "help_text": "Type: `file`. Do stats in these regions only. Tab-delimited file chr,from,to, 1-based, inclusive.\n" + + } + + + , + "sparse": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Suppress outputting IS rows where there are no insertions", + "help_text": "Type: `boolean_true`, default: `false`. Suppress outputting IS rows where there are no insertions.\n" + , + "default":false + } + + + , + "remove_overlaps": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Remove overlaps of paired-end reads from coverage and base count computations", + "help_text": "Type: `boolean_true`, default: `false`. Remove overlaps of paired-end reads from coverage and base count computations.\n" + , + "default":false + } + + + , + "cov_threshold": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Only bases with coverage above this value will be included in the target percentage computation", + "help_text": "Type: `integer`, example: `0`. Only bases with coverage above this value will be included in the target percentage computation. Default: `0`.\n" + + } + + + , + "input_fmt_option": { + "type": + "string", + "description": "Type: `string`. Specify a single input file format option in the form of OPTION or OPTION=VALUE", + "help_text": "Type: `string`. Specify a single input file format option in the form of OPTION or OPTION=VALUE.\n" + + } + + + , + "reference": { + "type": + "string", + "description": "Type: `file`. Reference sequence FASTA FILE", + "help_text": "Type: `file`. Reference sequence FASTA FILE.\n" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.txt`, example: `out.txt`. Output file", + "help_text": "Type: `file`, required, default: `$id.$key.output.txt`, example: `out.txt`. Output file.\n" + , + "default":"$id.$key.output.txt" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/.config.vsh.yaml new file mode 100644 index 0000000..69a46ff --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/.config.vsh.yaml @@ -0,0 +1,635 @@ +name: "sortmerna" +version: "v0.3.1" +argument_groups: +- name: "Input" + arguments: + - type: "boolean_true" + name: "--paired" + description: "Reads are paired-end. If a single reads file is provided, use this\ + \ option \nto indicate the file contains interleaved paired reads when neither\n\ + 'paired_in' | 'paired_out' | 'out2' | 'sout' are specified.\n" + info: null + direction: "input" + - type: "file" + name: "--input" + description: "Input fastq" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--ref" + description: "Reference fasta file(s) for rRNA database." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--ribo_database_manifest" + description: "Text file containing paths to fasta files (one per line) that will\ + \ be used to create the database for SortMeRNA." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--log" + description: "Sortmerna log file." + info: null + example: + - "$id.sortmerna.log" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output" + alternatives: + - "--aligned" + description: "Directory and file prefix for aligned output. The appropriate extension:\ + \ \n(fasta|fastq|blast|sam|etc) is automatically added.\nIf 'dir' is not specified,\ + \ the output is created in the WORKDIR/out/.\nIf 'pfx' is not specified, the\ + \ prefix 'aligned' is used.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--other" + description: "Create Non-aligned reads output file with this path/prefix. Must\ + \ be used with fastx." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Options" + arguments: + - type: "string" + name: "--kvdb" + description: "Path to directory of the key-value database file, used for storing\ + \ the alignment results." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--idx_dir" + description: "Path to the directory for storing the reference index files." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--readb" + description: "Path to the directory for storing pre-processed reads." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--fastx" + description: "Output aligned reads into FASTA/FASTQ file" + info: null + direction: "input" + - type: "boolean_true" + name: "--sam" + description: "Output SAM alignment for aligned reads." + info: null + direction: "input" + - type: "boolean_true" + name: "--sq" + description: "Add SQ tags to the SAM file" + info: null + direction: "input" + - type: "string" + name: "--blast" + description: "Blast options:\n* '0' - pairwise\n* '1' \ + \ - tabular(Blast - m 8 format)\n* '1 cigar' - tabular\ + \ + column for CIGAR\n* '1 cigar qcov' - tabular + columns for CIGAR\ + \ and query coverage\n* '1 cigar qcov qstrand' - tabular + columns for CIGAR,\ + \ query coverage and strand\n" + info: null + required: false + choices: + - "0" + - "1" + - "1 cigar" + - "1 cigar qcov" + - "1 cigar qcov qstrand" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--num_alignments" + description: "Report first INT alignments per read reaching E-value. If Int =\ + \ 0, all alignments will be output. Default: '0'\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_lis" + description: "search all alignments having the first INT longest LIS. LIS stands\ + \ for Longest Increasing Subsequence, it is\ncomputed using seeds' positions\ + \ to expand hits into longer matches prior to Smith-Waterman alignment. Default:\ + \ '2'.\n" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--print_all_reads" + description: "output null alignment strings for non-aligned reads to SAM and/or\ + \ BLAST tabular files." + info: null + direction: "input" + - type: "boolean_true" + name: "--paired_in" + description: "In the case where a pair of reads is aligned with a score above\ + \ the threshold, the output of the reads is controlled\nby the following options:\n\ + * --paired_in and --paired_out are both false: Only one read per pair is output\ + \ to the aligned fasta file.\n* --paired_in is true and --paired_out is false:\ + \ Both reads of the pair are output to the aligned fasta file.\n* --paired_in\ + \ is false and --paired_out is true: Both reads are output the the other fasta\ + \ file (if it is specified).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--paired_out" + description: "See description of --paired_in." + info: null + direction: "input" + - type: "boolean_true" + name: "--out2" + description: "Output paired reads into separate files. Must be used with '--fastx'.\ + \ If a single reads file is provided, this options\nimplies interleaved paired\ + \ reads. When used with 'sout', four (4) output files for aligned reads will\ + \ be generated:\n'aligned-paired-fwd, aligned-paired-rev, aligned-singleton-fwd,\ + \ aligned-singleton-rev'. If 'other' option is also used,\neight (8) output\ + \ files will be generated.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--sout" + description: "Separate paired and singleton aligned reads. Must be used with '--fastx'.\ + \ If a single reads file is provided,\nthis options implies interleaved paired\ + \ reads. Cannot be used with '--paired_in' or '--paired_out'.\n" + info: null + direction: "input" + - type: "string" + name: "--zip_out" + description: "Compress the output files. The possible values are: \n* '1/true/t/yes/y'\n\ + * '0/false/f/no/n'\n*'-1' (the same format as input - default)\nThe values are\ + \ Not case sensitive.\n" + info: null + example: + - "-1" + required: false + choices: + - "1" + - "true" + - "t" + - "yes" + - "y" + - "0" + - "false" + - "f" + - "no" + - "n" + - "-1" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--match" + description: "Smith-Waterman score for a match (positive integer). Default: '2'.\n" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--mismatch" + description: "Smith-Waterman penalty for a mismatch (negative integer). Default:\ + \ '-3'.\n" + info: null + example: + - -3 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--gap_open" + description: "Smith-Waterman penalty for introducing a gap (positive integer).\ + \ Default: '5'.\n" + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--gap_ext" + description: "Smith-Waterman penalty for extending a gap (positive integer). Default:\ + \ '2'.\n" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--N" + description: "Smith-Waterman penalty for ambiguous letters (N's) scored as --mismatch.\ + \ Default: '-1'.\n" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--a" + description: "Number of threads to use. Default: '1'.\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--e" + description: "E-value threshold. Default: '1'.\n" + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--F" + description: "Search only the forward strand." + info: null + direction: "input" + - type: "boolean_true" + name: "--R" + description: "Search only the reverse-complementary strand." + info: null + direction: "input" + - type: "integer" + name: "--num_alignment" + description: "Report first INT alignments per read reaching E-value (--num_alignments\ + \ 0 signifies all alignments will be output).\nDefault: '-1'\n" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--best" + description: "Report INT best alignments per read reaching E-value by searching\ + \ --min_lis INT candidate alignments (--best 0\nsignifies all candidate alignments\ + \ will be searched) Default: '1'.\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--verbose" + alternatives: + - "-v" + description: "Verbose output." + info: null + direction: "input" +- name: "OTU picking options" + arguments: + - type: "double" + name: "--id" + description: "%id similarity threshold (the alignment must still pass the E-value\ + \ threshold). Default: '0.97'.\n" + info: null + example: + - 0.97 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--coverage" + description: "%query coverage threshold (the alignment must still pass the E-value\ + \ threshold). Default: '0.97'.\n" + info: null + example: + - 0.97 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--de_novo" + description: "FASTA/FASTQ file for reads matching database < %id off (set using\ + \ --id) and < %cov (set using --coverage)\n(alignment must still pass the E-value\ + \ threshold).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--otu_map" + description: "Output OTU map (input to QIIME's make_otu_table.py).\n" + info: null + direction: "input" +- name: "Advanced options" + arguments: + - type: "integer" + name: "--num_seed" + description: "Number of seeds matched before searching for candidate LIS. Default:\ + \ '2'.\n" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--passes" + description: "Three intervals at which to place the seed on the read L,L/2,3 (L\ + \ is the seed length set in ./indexdb_rna).\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--edge" + description: "The number (or percentage if followed by %) of nucleotides to add\ + \ to each edge of the alignment region on the\nreference sequence before performing\ + \ Smith-Waterman alignment. Default: '4'.\n" + info: null + example: + - "4" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--full_search" + description: "Search for all 0-error and 1-error seed off matches in the index\ + \ rather than stopping after finding a 0-error match\n(<1% gain in sensitivity\ + \ with up four-fold decrease in speed).\n" + info: null + direction: "input" +- name: "Indexing Options" + arguments: + - type: "integer" + name: "--index" + description: "Create index files for the reference database. By default when this\ + \ option is not used, the program checks the\nreference index and builds it\ + \ if not already existing.\nThis can be changed by using '-index' as follows:\n\ + * '-index 0' - skip indexing. If the index does not exist, the program will\ + \ terminate\n and warn to build the index prior performing\ + \ the alignment\n* '-index 1' - only perform the indexing and terminate\n* '-index\ + \ 2' - the default behaviour, the same as when not using this option at all\n" + info: null + example: + - 2 + required: false + choices: + - 0 + - 1 + - 2 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "-L" + description: "Indexing seed length. Default: '18'\n" + info: null + example: + - 18.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--interval" + description: "Index every Nth L-mer in the reference database. Default: '1'\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_pos" + description: "Maximum number of positions to store for each unique L-mer. Set\ + \ to 0 to store all positions. Default: '1000'\n" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Local sequence alignment tool for filtering, mapping and clustering.\ + \ The main \napplication of SortMeRNA is filtering rRNA from metatranscriptomic\ + \ data.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "sort" +- "mRNA" +- "rRNA" +- "alignment" +- "filtering" +- "mapping" +- "clustering" +license: "GPL-3.0" +references: + doi: + - "10.1093/bioinformatics/bts611" +links: + repository: "https://github.com/sortmerna/sortmerna" + homepage: "https://sortmerna.readthedocs.io/en/latest/" + documentation: "https://sortmerna.readthedocs.io/en/latest/manual4.0.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "echo SortMeRNA: `sortmerna --version | sed -n 's/.*version \\([0-9]\\+\\.[0-9]\\\ + +\\.[0-9]\\+\\).*/\\1/p'`\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/sortmerna/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/sortmerna" + executable: "target/nextflow/sortmerna/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/main.nf new file mode 100644 index 0000000..bbd9642 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/main.nf @@ -0,0 +1,4404 @@ +// sortmerna v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "sortmerna", + "version" : "v0.3.1", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--paired", + "description" : "Reads are paired-end. If a single reads file is provided, use this option \nto indicate the file contains interleaved paired reads when neither\n'paired_in' | 'paired_out' | 'out2' | 'sout' are specified.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--input", + "description" : "Input fastq", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--ref", + "description" : "Reference fasta file(s) for rRNA database.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--ribo_database_manifest", + "description" : "Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--log", + "description" : "Sortmerna log file.", + "example" : [ + "$id.sortmerna.log" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "--aligned" + ], + "description" : "Directory and file prefix for aligned output. The appropriate extension: \n(fasta|fastq|blast|sam|etc) is automatically added.\nIf 'dir' is not specified, the output is created in the WORKDIR/out/.\nIf 'pfx' is not specified, the prefix 'aligned' is used.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--other", + "description" : "Create Non-aligned reads output file with this path/prefix. Must be used with fastx.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "string", + "name" : "--kvdb", + "description" : "Path to directory of the key-value database file, used for storing the alignment results.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--idx_dir", + "description" : "Path to the directory for storing the reference index files.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--readb", + "description" : "Path to the directory for storing pre-processed reads.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--fastx", + "description" : "Output aligned reads into FASTA/FASTQ file", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--sam", + "description" : "Output SAM alignment for aligned reads.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--sq", + "description" : "Add SQ tags to the SAM file", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--blast", + "description" : "Blast options:\n* '0' - pairwise\n* '1' - tabular(Blast - m 8 format)\n* '1 cigar' - tabular + column for CIGAR\n* '1 cigar qcov' - tabular + columns for CIGAR and query coverage\n* '1 cigar qcov qstrand' - tabular + columns for CIGAR, query coverage and strand\n", + "required" : false, + "choices" : [ + "0", + "1", + "1 cigar", + "1 cigar qcov", + "1 cigar qcov qstrand" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--num_alignments", + "description" : "Report first INT alignments per read reaching E-value. If Int = 0, all alignments will be output. Default: '0'\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_lis", + "description" : "search all alignments having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is\ncomputed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment. Default: '2'.\n", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--print_all_reads", + "description" : "output null alignment strings for non-aligned reads to SAM and/or BLAST tabular files.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--paired_in", + "description" : "In the case where a pair of reads is aligned with a score above the threshold, the output of the reads is controlled\nby the following options:\n* --paired_in and --paired_out are both false: Only one read per pair is output to the aligned fasta file.\n* --paired_in is true and --paired_out is false: Both reads of the pair are output to the aligned fasta file.\n* --paired_in is false and --paired_out is true: Both reads are output the the other fasta file (if it is specified).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--paired_out", + "description" : "See description of --paired_in.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--out2", + "description" : "Output paired reads into separate files. Must be used with '--fastx'. If a single reads file is provided, this options\nimplies interleaved paired reads. When used with 'sout', four (4) output files for aligned reads will be generated:\n'aligned-paired-fwd, aligned-paired-rev, aligned-singleton-fwd, aligned-singleton-rev'. If 'other' option is also used,\neight (8) output files will be generated.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--sout", + "description" : "Separate paired and singleton aligned reads. Must be used with '--fastx'. If a single reads file is provided,\nthis options implies interleaved paired reads. Cannot be used with '--paired_in' or '--paired_out'.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--zip_out", + "description" : "Compress the output files. The possible values are: \n* '1/true/t/yes/y'\n* '0/false/f/no/n'\n*'-1' (the same format as input - default)\nThe values are Not case sensitive.\n", + "example" : [ + "-1" + ], + "required" : false, + "choices" : [ + "1", + "true", + "t", + "yes", + "y", + "0", + "false", + "f", + "no", + "n", + "-1" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--match", + "description" : "Smith-Waterman score for a match (positive integer). Default: '2'.\n", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--mismatch", + "description" : "Smith-Waterman penalty for a mismatch (negative integer). Default: '-3'.\n", + "example" : [ + -3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--gap_open", + "description" : "Smith-Waterman penalty for introducing a gap (positive integer). Default: '5'.\n", + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--gap_ext", + "description" : "Smith-Waterman penalty for extending a gap (positive integer). Default: '2'.\n", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--N", + "description" : "Smith-Waterman penalty for ambiguous letters (N's) scored as --mismatch. Default: '-1'.\n", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--a", + "description" : "Number of threads to use. Default: '1'.\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--e", + "description" : "E-value threshold. Default: '1'.\n", + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--F", + "description" : "Search only the forward strand.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--R", + "description" : "Search only the reverse-complementary strand.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--num_alignment", + "description" : "Report first INT alignments per read reaching E-value (--num_alignments 0 signifies all alignments will be output).\nDefault: '-1'\n", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--best", + "description" : "Report INT best alignments per read reaching E-value by searching --min_lis INT candidate alignments (--best 0\nsignifies all candidate alignments will be searched) Default: '1'.\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--verbose", + "alternatives" : [ + "-v" + ], + "description" : "Verbose output.", + "direction" : "input" + } + ] + }, + { + "name" : "OTU picking options", + "arguments" : [ + { + "type" : "double", + "name" : "--id", + "description" : "%id similarity threshold (the alignment must still pass the E-value threshold). Default: '0.97'.\n", + "example" : [ + 0.97 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--coverage", + "description" : "%query coverage threshold (the alignment must still pass the E-value threshold). Default: '0.97'.\n", + "example" : [ + 0.97 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--de_novo", + "description" : "FASTA/FASTQ file for reads matching database < %id off (set using --id) and < %cov (set using --coverage)\n(alignment must still pass the E-value threshold).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--otu_map", + "description" : "Output OTU map (input to QIIME's make_otu_table.py).\n", + "direction" : "input" + } + ] + }, + { + "name" : "Advanced options", + "arguments" : [ + { + "type" : "integer", + "name" : "--num_seed", + "description" : "Number of seeds matched before searching for candidate LIS. Default: '2'.\n", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--passes", + "description" : "Three intervals at which to place the seed on the read L,L/2,3 (L is the seed length set in ./indexdb_rna).\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--edge", + "description" : "The number (or percentage if followed by %) of nucleotides to add to each edge of the alignment region on the\nreference sequence before performing Smith-Waterman alignment. Default: '4'.\n", + "example" : [ + "4" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--full_search", + "description" : "Search for all 0-error and 1-error seed off matches in the index rather than stopping after finding a 0-error match\n(<1% gain in sensitivity with up four-fold decrease in speed).\n", + "direction" : "input" + } + ] + }, + { + "name" : "Indexing Options", + "arguments" : [ + { + "type" : "integer", + "name" : "--index", + "description" : "Create index files for the reference database. By default when this option is not used, the program checks the\nreference index and builds it if not already existing.\nThis can be changed by using '-index' as follows:\n* '-index 0' - skip indexing. If the index does not exist, the program will terminate\n and warn to build the index prior performing the alignment\n* '-index 1' - only perform the indexing and terminate\n* '-index 2' - the default behaviour, the same as when not using this option at all\n", + "example" : [ + 2 + ], + "required" : false, + "choices" : [ + 0, + 1, + 2 + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "-L", + "description" : "Indexing seed length. Default: '18'\n", + "example" : [ + 18.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--interval", + "description" : "Index every Nth L-mer in the reference database. Default: '1'\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--max_pos", + "description" : "Maximum number of positions to store for each unique L-mer. Set to 0 to store all positions. Default: '1000'\n", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Local sequence alignment tool for filtering, mapping and clustering. The main \napplication of SortMeRNA is filtering rRNA from metatranscriptomic data.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "sort", + "mRNA", + "rRNA", + "alignment", + "filtering", + "mapping", + "clustering" + ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.1093/bioinformatics/bts611" + ] + }, + "links" : { + "repository" : "https://github.com/sortmerna/sortmerna", + "homepage" : "https://sortmerna.readthedocs.io/en/latest/", + "documentation" : "https://sortmerna.readthedocs.io/en/latest/manual4.0.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "echo SortMeRNA: `sortmerna --version | sed -n 's/.*version \\\\([0-9]\\\\+\\\\.[0-9]\\\\+\\\\.[0-9]\\\\+\\\\).*/\\\\1/p'`\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/sortmerna/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/sortmerna", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_REF+x} ]; then echo "${VIASH_PAR_REF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ref='&'#" ; else echo "# par_ref="; fi ) +$( if [ ! -z ${VIASH_PAR_RIBO_DATABASE_MANIFEST+x} ]; then echo "${VIASH_PAR_RIBO_DATABASE_MANIFEST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ribo_database_manifest='&'#" ; else echo "# par_ribo_database_manifest="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_OTHER+x} ]; then echo "${VIASH_PAR_OTHER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_other='&'#" ; else echo "# par_other="; fi ) +$( if [ ! -z ${VIASH_PAR_KVDB+x} ]; then echo "${VIASH_PAR_KVDB}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kvdb='&'#" ; else echo "# par_kvdb="; fi ) +$( if [ ! -z ${VIASH_PAR_IDX_DIR+x} ]; then echo "${VIASH_PAR_IDX_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_idx_dir='&'#" ; else echo "# par_idx_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_READB+x} ]; then echo "${VIASH_PAR_READB}" | sed "s#'#'\\"'\\"'#g;s#.*#par_readb='&'#" ; else echo "# par_readb="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTX+x} ]; then echo "${VIASH_PAR_FASTX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastx='&'#" ; else echo "# par_fastx="; fi ) +$( if [ ! -z ${VIASH_PAR_SAM+x} ]; then echo "${VIASH_PAR_SAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sam='&'#" ; else echo "# par_sam="; fi ) +$( if [ ! -z ${VIASH_PAR_SQ+x} ]; then echo "${VIASH_PAR_SQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sq='&'#" ; else echo "# par_sq="; fi ) +$( if [ ! -z ${VIASH_PAR_BLAST+x} ]; then echo "${VIASH_PAR_BLAST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_blast='&'#" ; else echo "# par_blast="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_ALIGNMENTS+x} ]; then echo "${VIASH_PAR_NUM_ALIGNMENTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_alignments='&'#" ; else echo "# par_num_alignments="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_LIS+x} ]; then echo "${VIASH_PAR_MIN_LIS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_lis='&'#" ; else echo "# par_min_lis="; fi ) +$( if [ ! -z ${VIASH_PAR_PRINT_ALL_READS+x} ]; then echo "${VIASH_PAR_PRINT_ALL_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_print_all_reads='&'#" ; else echo "# par_print_all_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED_IN+x} ]; then echo "${VIASH_PAR_PAIRED_IN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired_in='&'#" ; else echo "# par_paired_in="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED_OUT+x} ]; then echo "${VIASH_PAR_PAIRED_OUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired_out='&'#" ; else echo "# par_paired_out="; fi ) +$( if [ ! -z ${VIASH_PAR_OUT2+x} ]; then echo "${VIASH_PAR_OUT2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_out2='&'#" ; else echo "# par_out2="; fi ) +$( if [ ! -z ${VIASH_PAR_SOUT+x} ]; then echo "${VIASH_PAR_SOUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sout='&'#" ; else echo "# par_sout="; fi ) +$( if [ ! -z ${VIASH_PAR_ZIP_OUT+x} ]; then echo "${VIASH_PAR_ZIP_OUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_zip_out='&'#" ; else echo "# par_zip_out="; fi ) +$( if [ ! -z ${VIASH_PAR_MATCH+x} ]; then echo "${VIASH_PAR_MATCH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_match='&'#" ; else echo "# par_match="; fi ) +$( if [ ! -z ${VIASH_PAR_MISMATCH+x} ]; then echo "${VIASH_PAR_MISMATCH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mismatch='&'#" ; else echo "# par_mismatch="; fi ) +$( if [ ! -z ${VIASH_PAR_GAP_OPEN+x} ]; then echo "${VIASH_PAR_GAP_OPEN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gap_open='&'#" ; else echo "# par_gap_open="; fi ) +$( if [ ! -z ${VIASH_PAR_GAP_EXT+x} ]; then echo "${VIASH_PAR_GAP_EXT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gap_ext='&'#" ; else echo "# par_gap_ext="; fi ) +$( if [ ! -z ${VIASH_PAR_N+x} ]; then echo "${VIASH_PAR_N}" | sed "s#'#'\\"'\\"'#g;s#.*#par_N='&'#" ; else echo "# par_N="; fi ) +$( if [ ! -z ${VIASH_PAR_A+x} ]; then echo "${VIASH_PAR_A}" | sed "s#'#'\\"'\\"'#g;s#.*#par_a='&'#" ; else echo "# par_a="; fi ) +$( if [ ! -z ${VIASH_PAR_E+x} ]; then echo "${VIASH_PAR_E}" | sed "s#'#'\\"'\\"'#g;s#.*#par_e='&'#" ; else echo "# par_e="; fi ) +$( if [ ! -z ${VIASH_PAR_F+x} ]; then echo "${VIASH_PAR_F}" | sed "s#'#'\\"'\\"'#g;s#.*#par_F='&'#" ; else echo "# par_F="; fi ) +$( if [ ! -z ${VIASH_PAR_R+x} ]; then echo "${VIASH_PAR_R}" | sed "s#'#'\\"'\\"'#g;s#.*#par_R='&'#" ; else echo "# par_R="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_ALIGNMENT+x} ]; then echo "${VIASH_PAR_NUM_ALIGNMENT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_alignment='&'#" ; else echo "# par_num_alignment="; fi ) +$( if [ ! -z ${VIASH_PAR_BEST+x} ]; then echo "${VIASH_PAR_BEST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_best='&'#" ; else echo "# par_best="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) +$( if [ ! -z ${VIASH_PAR_COVERAGE+x} ]; then echo "${VIASH_PAR_COVERAGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_coverage='&'#" ; else echo "# par_coverage="; fi ) +$( if [ ! -z ${VIASH_PAR_DE_NOVO+x} ]; then echo "${VIASH_PAR_DE_NOVO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_de_novo='&'#" ; else echo "# par_de_novo="; fi ) +$( if [ ! -z ${VIASH_PAR_OTU_MAP+x} ]; then echo "${VIASH_PAR_OTU_MAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_otu_map='&'#" ; else echo "# par_otu_map="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_SEED+x} ]; then echo "${VIASH_PAR_NUM_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_seed='&'#" ; else echo "# par_num_seed="; fi ) +$( if [ ! -z ${VIASH_PAR_PASSES+x} ]; then echo "${VIASH_PAR_PASSES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_passes='&'#" ; else echo "# par_passes="; fi ) +$( if [ ! -z ${VIASH_PAR_EDGE+x} ]; then echo "${VIASH_PAR_EDGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_edge='&'#" ; else echo "# par_edge="; fi ) +$( if [ ! -z ${VIASH_PAR_FULL_SEARCH+x} ]; then echo "${VIASH_PAR_FULL_SEARCH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_full_search='&'#" ; else echo "# par_full_search="; fi ) +$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) +$( if [ ! -z ${VIASH_PAR_L+x} ]; then echo "${VIASH_PAR_L}" | sed "s#'#'\\"'\\"'#g;s#.*#par_L='&'#" ; else echo "# par_L="; fi ) +$( if [ ! -z ${VIASH_PAR_INTERVAL+x} ]; then echo "${VIASH_PAR_INTERVAL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_interval='&'#" ; else echo "# par_interval="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_POS+x} ]; then echo "${VIASH_PAR_MAX_POS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_pos='&'#" ; else echo "# par_max_pos="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -eo pipefail + +unset_if_false=( par_fastx par_sq par_fastx par_print_all_reads par_paired_in par_paired_out + par_F par_R par_verbose par_de_novo par_otu_map par_full_search par_out2 + par_sout par_sam par_paired ) + + +for var in "\\${unset_if_false[@]}"; do + if [ "\\${!var}" == "false" ]; then + unset \\$var + fi +done + +reads=() +IFS=";" read -ra input <<< "\\$par_input" +if [ "\\${#input[@]}" -eq 2 ]; then + reads="--reads \\${input[0]} --reads \\${input[1]}" + # set paired to true in case it's not + par_paired=true +else + reads="--reads \\${input[0]}" + par_paired=false +fi + +refs=() + +# check if references are input normally or through a manifest file +if [[ ! -z "\\$par_ribo_database_manifest" ]]; then + while IFS= read -r path || [[ -n \\$path ]]; do + refs=\\$refs" --ref \\$path" + done < \\$par_ribo_database_manifest + +elif [[ ! -z "\\$par_ref" ]]; then + IFS=";" read -ra ref <<< "\\$par_ref" + for i in "\\${ref[@]}" + do + refs+="-ref \\$i " + done + +else + echo "No reference fasta file(s) provided" + exit 1 +fi + + +sortmerna \\\\ + \\$refs \\\\ + \\$reads \\\\ + --workdir . \\\\ + \\${par_output:+--aligned "\\${par_output}"} \\\\ + \\${par_fastx:+--fastx} \\\\ + \\${par_other:+--other "\\${par_other}"} \\\\ + \\${par_kvdb:+--kvdb "\\${par_kvdb}"} \\\\ + \\${par_idx_dir:+--idx-dir "\\${par_idx_dir}"} \\\\ + \\${par_readb:+--readb "\\${par_readb}"} \\\\ + \\${par_sam:+--sam} \\\\ + \\${par_sq:+--sq} \\\\ + \\${par_blast:+--blast "\\${par_blast}"} \\\\ + \\${par_num_alignments:+--num_alignments "\\${par_num_alignments}"} \\\\ + \\${par_min_lis:+--min_lis "\\${par_min_lis}"} \\\\ + \\${par_print_all_reads:+--print_all_reads} \\\\ + \\${par_paired_in:+--paired_in} \\\\ + \\${par_paired_out:+--paired_out} \\\\ + \\${par_out2:+--out2} \\\\ + \\${par_sout:+--sout} \\\\ + \\${par_zip_out:+--zip-out "\\${par_zip_out}"} \\\\ + \\${par_match:+--match "\\${par_match}"} \\\\ + \\${par_mismatch:+--mismatch "\\${par_mismatch}"} \\\\ + \\${par_gap_open:+--gap_open "\\${par_gap_open}"} \\\\ + \\${par_gap_ext:+--gap_ext "\\${par_gap_ext}"} \\\\ + \\${par_N:+-N "\\${par_N}"} \\\\ + \\${par_a:+-a "\\${par_a}"} \\\\ + \\${par_e:+-e "\\${par_e}"} \\\\ + \\${par_F:+-F} \\\\ + \\${par_R:+-R} \\\\ + \\${par_num_alignment:+--num_alignment "\\${par_num_alignment}"} \\\\ + \\${par_best:+--best "\\${par_best}"} \\\\ + \\${par_verbose:+--verbose} \\\\ + \\${par_id:+--id "\\${par_id}"} \\\\ + \\${par_coverage:+--coverage "\\${par_coverage}"} \\\\ + \\${par_de_novo:+--de_novo} \\\\ + \\${par_otu_map:+--otu_map} \\\\ + \\${par_num_seed:+--num_seed "\\${par_num_seed}"} \\\\ + \\${par_passes:+--passes "\\${par_passes}"} \\\\ + \\${par_edge:+--edge "\\${par_edge}"} \\\\ + \\${par_full_search:+--full_search} \\\\ + \\${par_index:+--index "\\${par_index}"} \\\\ + \\${par_L:+-L \\$par_L} \\\\ + \\${par_interval:+--interval "\\${par_interval}"} \\\\ + \\${par_max_pos:+--max_pos "\\${par_max_pos}"} + + +if [ ! -z \\$par_log ]; then + mv "\\${par_output}.log" \\$par_log +fi + +exit 0 +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/sortmerna", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/nextflow.config new file mode 100644 index 0000000..a1fa7c2 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'sortmerna' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Local sequence alignment tool for filtering, mapping and clustering. The main \napplication of SortMeRNA is filtering rRNA from metatranscriptomic data.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/nextflow_schema.json new file mode 100644 index 0000000..f3c2fcd --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/nextflow_schema.json @@ -0,0 +1,614 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "sortmerna", +"description": "Local sequence alignment tool for filtering, mapping and clustering. The main \napplication of SortMeRNA is filtering rRNA from metatranscriptomic data.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "paired": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Reads are paired-end", + "help_text": "Type: `boolean_true`, default: `false`. Reads are paired-end. If a single reads file is provided, use this option \nto indicate the file contains interleaved paired reads when neither\n\u0027paired_in\u0027 | \u0027paired_out\u0027 | \u0027out2\u0027 | \u0027sout\u0027 are specified.\n" + , + "default":false + } + + + , + "input": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. Input fastq", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Input fastq" + + } + + + , + "ref": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. Reference fasta file(s) for rRNA database", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Reference fasta file(s) for rRNA database." + + } + + + , + "ribo_database_manifest": { + "type": + "string", + "description": "Type: `file`. Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA", + "help_text": "Type: `file`. Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "log": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.log.log`, example: `$id.sortmerna.log`. Sortmerna log file", + "help_text": "Type: `file`, default: `$id.$key.log.log`, example: `$id.sortmerna.log`. Sortmerna log file." + , + "default":"$id.$key.log.log" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output`. Directory and file prefix for aligned output", + "help_text": "Type: `file`, default: `$id.$key.output`. Directory and file prefix for aligned output. The appropriate extension: \n(fasta|fastq|blast|sam|etc) is automatically added.\nIf \u0027dir\u0027 is not specified, the output is created in the WORKDIR/out/.\nIf \u0027pfx\u0027 is not specified, the prefix \u0027aligned\u0027 is used.\n" + , + "default":"$id.$key.output" + } + + + , + "other": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.other`. Create Non-aligned reads output file with this path/prefix", + "help_text": "Type: `file`, default: `$id.$key.other`. Create Non-aligned reads output file with this path/prefix. Must be used with fastx." + , + "default":"$id.$key.other" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "kvdb": { + "type": + "string", + "description": "Type: `string`. Path to directory of the key-value database file, used for storing the alignment results", + "help_text": "Type: `string`. Path to directory of the key-value database file, used for storing the alignment results." + + } + + + , + "idx_dir": { + "type": + "string", + "description": "Type: `string`. Path to the directory for storing the reference index files", + "help_text": "Type: `string`. Path to the directory for storing the reference index files." + + } + + + , + "readb": { + "type": + "string", + "description": "Type: `string`. Path to the directory for storing pre-processed reads", + "help_text": "Type: `string`. Path to the directory for storing pre-processed reads." + + } + + + , + "fastx": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output aligned reads into FASTA/FASTQ file", + "help_text": "Type: `boolean_true`, default: `false`. Output aligned reads into FASTA/FASTQ file" + , + "default":false + } + + + , + "sam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output SAM alignment for aligned reads", + "help_text": "Type: `boolean_true`, default: `false`. Output SAM alignment for aligned reads." + , + "default":false + } + + + , + "sq": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Add SQ tags to the SAM file", + "help_text": "Type: `boolean_true`, default: `false`. Add SQ tags to the SAM file" + , + "default":false + } + + + , + "blast": { + "type": + "string", + "description": "Type: `string`, choices: ``0`, `1`, `1 cigar`, `1 cigar qcov`, `1 cigar qcov qstrand``. Blast options:\n* \u00270\u0027 - pairwise\n* \u00271\u0027 - tabular(Blast - m 8 format)\n* \u00271 cigar\u0027 - tabular + column for CIGAR\n* \u00271 cigar qcov\u0027 - tabular + columns for CIGAR and query coverage\n* \u00271 cigar qcov qstrand\u0027 - tabular + columns for CIGAR, query coverage and strand\n", + "help_text": "Type: `string`, choices: ``0`, `1`, `1 cigar`, `1 cigar qcov`, `1 cigar qcov qstrand``. Blast options:\n* \u00270\u0027 - pairwise\n* \u00271\u0027 - tabular(Blast - m 8 format)\n* \u00271 cigar\u0027 - tabular + column for CIGAR\n* \u00271 cigar qcov\u0027 - tabular + columns for CIGAR and query coverage\n* \u00271 cigar qcov qstrand\u0027 - tabular + columns for CIGAR, query coverage and strand\n", + "enum": ["0", "1", "1 cigar", "1 cigar qcov", "1 cigar qcov qstrand"] + + + } + + + , + "num_alignments": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Report first INT alignments per read reaching E-value", + "help_text": "Type: `integer`, example: `0`. Report first INT alignments per read reaching E-value. If Int = 0, all alignments will be output. Default: \u00270\u0027\n" + + } + + + , + "min_lis": { + "type": + "integer", + "description": "Type: `integer`, example: `2`. search all alignments having the first INT longest LIS", + "help_text": "Type: `integer`, example: `2`. search all alignments having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is\ncomputed using seeds\u0027 positions to expand hits into longer matches prior to Smith-Waterman alignment. Default: \u00272\u0027.\n" + + } + + + , + "print_all_reads": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. output null alignment strings for non-aligned reads to SAM and/or BLAST tabular files", + "help_text": "Type: `boolean_true`, default: `false`. output null alignment strings for non-aligned reads to SAM and/or BLAST tabular files." + , + "default":false + } + + + , + "paired_in": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. In the case where a pair of reads is aligned with a score above the threshold, the output of the reads is controlled\nby the following options:\n* --paired_in and --paired_out are both false: Only one read per pair is output to the aligned fasta file", + "help_text": "Type: `boolean_true`, default: `false`. In the case where a pair of reads is aligned with a score above the threshold, the output of the reads is controlled\nby the following options:\n* --paired_in and --paired_out are both false: Only one read per pair is output to the aligned fasta file.\n* --paired_in is true and --paired_out is false: Both reads of the pair are output to the aligned fasta file.\n* --paired_in is false and --paired_out is true: Both reads are output the the other fasta file (if it is specified).\n" + , + "default":false + } + + + , + "paired_out": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. See description of --paired_in", + "help_text": "Type: `boolean_true`, default: `false`. See description of --paired_in." + , + "default":false + } + + + , + "out2": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output paired reads into separate files", + "help_text": "Type: `boolean_true`, default: `false`. Output paired reads into separate files. Must be used with \u0027--fastx\u0027. If a single reads file is provided, this options\nimplies interleaved paired reads. When used with \u0027sout\u0027, four (4) output files for aligned reads will be generated:\n\u0027aligned-paired-fwd, aligned-paired-rev, aligned-singleton-fwd, aligned-singleton-rev\u0027. If \u0027other\u0027 option is also used,\neight (8) output files will be generated.\n" + , + "default":false + } + + + , + "sout": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Separate paired and singleton aligned reads", + "help_text": "Type: `boolean_true`, default: `false`. Separate paired and singleton aligned reads. Must be used with \u0027--fastx\u0027. If a single reads file is provided,\nthis options implies interleaved paired reads. Cannot be used with \u0027--paired_in\u0027 or \u0027--paired_out\u0027.\n" + , + "default":false + } + + + , + "zip_out": { + "type": + "string", + "description": "Type: `string`, example: `-1`, choices: ``1`, `true`, `t`, `yes`, `y`, `0`, `false`, `f`, `no`, `n`, `-1``. Compress the output files", + "help_text": "Type: `string`, example: `-1`, choices: ``1`, `true`, `t`, `yes`, `y`, `0`, `false`, `f`, `no`, `n`, `-1``. Compress the output files. The possible values are: \n* \u00271/true/t/yes/y\u0027\n* \u00270/false/f/no/n\u0027\n*\u0027-1\u0027 (the same format as input - default)\nThe values are Not case sensitive.\n", + "enum": ["1", "true", "t", "yes", "y", "0", "false", "f", "no", "n", "-1"] + + + } + + + , + "match": { + "type": + "integer", + "description": "Type: `integer`, example: `2`. Smith-Waterman score for a match (positive integer)", + "help_text": "Type: `integer`, example: `2`. Smith-Waterman score for a match (positive integer). Default: \u00272\u0027.\n" + + } + + + , + "mismatch": { + "type": + "integer", + "description": "Type: `integer`, example: `-3`. Smith-Waterman penalty for a mismatch (negative integer)", + "help_text": "Type: `integer`, example: `-3`. Smith-Waterman penalty for a mismatch (negative integer). Default: \u0027-3\u0027.\n" + + } + + + , + "gap_open": { + "type": + "integer", + "description": "Type: `integer`, example: `5`. Smith-Waterman penalty for introducing a gap (positive integer)", + "help_text": "Type: `integer`, example: `5`. Smith-Waterman penalty for introducing a gap (positive integer). Default: \u00275\u0027.\n" + + } + + + , + "gap_ext": { + "type": + "integer", + "description": "Type: `integer`, example: `2`. Smith-Waterman penalty for extending a gap (positive integer)", + "help_text": "Type: `integer`, example: `2`. Smith-Waterman penalty for extending a gap (positive integer). Default: \u00272\u0027.\n" + + } + + + , + "N": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. Smith-Waterman penalty for ambiguous letters (N\u0027s) scored as --mismatch", + "help_text": "Type: `integer`, example: `-1`. Smith-Waterman penalty for ambiguous letters (N\u0027s) scored as --mismatch. Default: \u0027-1\u0027.\n" + + } + + + , + "a": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Number of threads to use", + "help_text": "Type: `integer`, example: `1`. Number of threads to use. Default: \u00271\u0027.\n" + + } + + + , + "e": { + "type": + "number", + "description": "Type: `double`, example: `1.0`. E-value threshold", + "help_text": "Type: `double`, example: `1.0`. E-value threshold. Default: \u00271\u0027.\n" + + } + + + , + "F": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Search only the forward strand", + "help_text": "Type: `boolean_true`, default: `false`. Search only the forward strand." + , + "default":false + } + + + , + "R": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Search only the reverse-complementary strand", + "help_text": "Type: `boolean_true`, default: `false`. Search only the reverse-complementary strand." + , + "default":false + } + + + , + "num_alignment": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. Report first INT alignments per read reaching E-value (--num_alignments 0 signifies all alignments will be output)", + "help_text": "Type: `integer`, example: `-1`. Report first INT alignments per read reaching E-value (--num_alignments 0 signifies all alignments will be output).\nDefault: \u0027-1\u0027\n" + + } + + + , + "best": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Report INT best alignments per read reaching E-value by searching --min_lis INT candidate alignments (--best 0\nsignifies all candidate alignments will be searched) Default: \u00271\u0027", + "help_text": "Type: `integer`, example: `1`. Report INT best alignments per read reaching E-value by searching --min_lis INT candidate alignments (--best 0\nsignifies all candidate alignments will be searched) Default: \u00271\u0027.\n" + + } + + + , + "verbose": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Verbose output", + "help_text": "Type: `boolean_true`, default: `false`. Verbose output." + , + "default":false + } + + +} +}, + + + "otu picking options" : { + "title": "OTU picking options", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "number", + "description": "Type: `double`, example: `0.97`. %id similarity threshold (the alignment must still pass the E-value threshold)", + "help_text": "Type: `double`, example: `0.97`. %id similarity threshold (the alignment must still pass the E-value threshold). Default: \u00270.97\u0027.\n" + + } + + + , + "coverage": { + "type": + "number", + "description": "Type: `double`, example: `0.97`. %query coverage threshold (the alignment must still pass the E-value threshold)", + "help_text": "Type: `double`, example: `0.97`. %query coverage threshold (the alignment must still pass the E-value threshold). Default: \u00270.97\u0027.\n" + + } + + + , + "de_novo": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. FASTA/FASTQ file for reads matching database \u003c %id off (set using --id) and \u003c %cov (set using --coverage)\n(alignment must still pass the E-value threshold)", + "help_text": "Type: `boolean_true`, default: `false`. FASTA/FASTQ file for reads matching database \u003c %id off (set using --id) and \u003c %cov (set using --coverage)\n(alignment must still pass the E-value threshold).\n" + , + "default":false + } + + + , + "otu_map": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output OTU map (input to QIIME\u0027s make_otu_table", + "help_text": "Type: `boolean_true`, default: `false`. Output OTU map (input to QIIME\u0027s make_otu_table.py).\n" + , + "default":false + } + + +} +}, + + + "advanced options" : { + "title": "Advanced options", + "type": "object", + "description": "No description", + "properties": { + + + "num_seed": { + "type": + "integer", + "description": "Type: `integer`, example: `2`. Number of seeds matched before searching for candidate LIS", + "help_text": "Type: `integer`, example: `2`. Number of seeds matched before searching for candidate LIS. Default: \u00272\u0027.\n" + + } + + + , + "passes": { + "type": + "string", + "description": "Type: List of `integer`, multiple_sep: `\";\"`. Three intervals at which to place the seed on the read L,L/2,3 (L is the seed length set in ", + "help_text": "Type: List of `integer`, multiple_sep: `\";\"`. Three intervals at which to place the seed on the read L,L/2,3 (L is the seed length set in ./indexdb_rna).\n" + + } + + + , + "edge": { + "type": + "string", + "description": "Type: `string`, example: `4`. The number (or percentage if followed by %) of nucleotides to add to each edge of the alignment region on the\nreference sequence before performing Smith-Waterman alignment", + "help_text": "Type: `string`, example: `4`. The number (or percentage if followed by %) of nucleotides to add to each edge of the alignment region on the\nreference sequence before performing Smith-Waterman alignment. Default: \u00274\u0027.\n" + + } + + + , + "full_search": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Search for all 0-error and 1-error seed off matches in the index rather than stopping after finding a 0-error match\n(\u003c1% gain in sensitivity with up four-fold decrease in speed)", + "help_text": "Type: `boolean_true`, default: `false`. Search for all 0-error and 1-error seed off matches in the index rather than stopping after finding a 0-error match\n(\u003c1% gain in sensitivity with up four-fold decrease in speed).\n" + , + "default":false + } + + +} +}, + + + "indexing options" : { + "title": "Indexing Options", + "type": "object", + "description": "No description", + "properties": { + + + "index": { + "type": + "integer", + "description": "Type: `integer`, example: `2`, choices: ``0`, `1`, `2``. Create index files for the reference database", + "help_text": "Type: `integer`, example: `2`, choices: ``0`, `1`, `2``. Create index files for the reference database. By default when this option is not used, the program checks the\nreference index and builds it if not already existing.\nThis can be changed by using \u0027-index\u0027 as follows:\n* \u0027-index 0\u0027 - skip indexing. If the index does not exist, the program will terminate\n and warn to build the index prior performing the alignment\n* \u0027-index 1\u0027 - only perform the indexing and terminate\n* \u0027-index 2\u0027 - the default behaviour, the same as when not using this option at all\n", + "enum": [0, 1, 2] + + + } + + + , + "-L": { + "type": + "number", + "description": "Type: `double`, example: `18.0`. Indexing seed length", + "help_text": "Type: `double`, example: `18.0`. Indexing seed length. Default: \u002718\u0027\n" + + } + + + , + "interval": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Index every Nth L-mer in the reference database", + "help_text": "Type: `integer`, example: `1`. Index every Nth L-mer in the reference database. Default: \u00271\u0027\n" + + } + + + , + "max_pos": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. Maximum number of positions to store for each unique L-mer", + "help_text": "Type: `integer`, example: `1000`. Maximum number of positions to store for each unique L-mer. Set to 0 to store all positions. Default: \u00271000\u0027\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/otu picking options" + }, + + { + "$ref": "#/definitions/advanced options" + }, + + { + "$ref": "#/definitions/indexing options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/.config.vsh.yaml new file mode 100644 index 0000000..f872574 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/.config.vsh.yaml @@ -0,0 +1,2704 @@ +name: "star_align_reads" +namespace: "star" +version: "v0.3.1" +authors: +- name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" +- name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" +argument_groups: +- name: "Run Parameters" + arguments: + - type: "integer" + name: "--run_rng_seed" + description: "random number generator seed." + info: + orig_name: "--runRNGseed" + example: + - 777 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Genome Parameters" + arguments: + - type: "file" + name: "--genome_dir" + description: "path to the directory where genome files are stored (for --runMode\ + \ alignReads) or will be generated (for --runMode generateGenome)" + info: + orig_name: "--genomeDir" + example: + - "./GenomeDir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--genome_load" + description: "mode of shared memory usage for the genome files. Only used with\ + \ --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and\ + \ keep it in memory after run\n- LoadAndRemove ... load genome into shared\ + \ but remove it after run\n- LoadAndExit ... load genome into shared memory\ + \ and exit, keeping the genome in memory for future runs\n- Remove \ + \ ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory\ + \ ... do not use shared memory, each job will have its own private copy of\ + \ the genome" + info: + orig_name: "--genomeLoad" + example: + - "NoSharedMemory" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_fasta_files" + description: "path(s) to the fasta files with the genome sequences, separated\ + \ by spaces. These files should be plain text FASTA files, they *cannot* be\ + \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ + \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ + \ sequences to the genome (e.g. spike-ins)." + info: + orig_name: "--genomeFastaFiles" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--genome_file_sizes" + description: "genome files exact sizes in bytes. Typically, this should not be\ + \ defined by the user." + info: + orig_name: "--genomeFileSizes" + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--genome_transform_output" + description: "which output to transform back to original genome\n\n- SAM ...\ + \ SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n- Quant \ + \ ... quantifications (from --quant_mode option)\n- None ... no transformation\ + \ of the output" + info: + orig_name: "--genomeTransformOutput" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--genome_chr_set_mitochondrial" + description: "names of the mitochondrial chromosomes. Presently only used for\ + \ STARsolo statistics output/" + info: + orig_name: "--genomeChrSetMitochondrial" + example: + - "chrM" + - "M" + - "MT" + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Splice Junctions Database" + arguments: + - type: "string" + name: "--sjdb_file_chr_start_end" + description: "path to the files with genomic coordinates (chr start \ + \ end strand) for the splice junction introns. Multiple files can be supplied\ + \ and will be concatenated." + info: + orig_name: "--sjdbFileChrStartEnd" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--sjdb_gtf_file" + description: "path to the GTF file with annotations" + info: + orig_name: "--sjdbGTFfile" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdb_gtf_chr_prefix" + description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ + \ ENSMEBL annotations with UCSC genomes)" + info: + orig_name: "--sjdbGTFchrPrefix" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdb_gtf_feature_exon" + description: "feature type in GTF file to be used as exons for building transcripts" + info: + orig_name: "--sjdbGTFfeatureExon" + example: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdb_gtf_tag_exon_parent_transcript" + description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ + \ works for GTF files)" + info: + orig_name: "--sjdbGTFtagExonParentTranscript" + example: + - "transcript_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdb_gtf_tag_exon_parent_gene" + description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ + \ for GTF files)" + info: + orig_name: "--sjdbGTFtagExonParentGene" + example: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdb_gtf_tag_exon_parent_gene_name" + description: "GTF attribute name for parent gene name" + info: + orig_name: "--sjdbGTFtagExonParentGeneName" + example: + - "gene_name" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--sjdb_gtf_tag_exon_parent_gene_type" + description: "GTF attribute name for parent gene type" + info: + orig_name: "--sjdbGTFtagExonParentGeneType" + example: + - "gene_type" + - "gene_biotype" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--sjdb_overhang" + description: "length of the donor/acceptor sequence on each side of the junctions,\ + \ ideally = (mate_length - 1)" + info: + orig_name: "--sjdbOverhang" + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sjdb_score" + description: "extra alignment score for alignments that cross database junctions" + info: + orig_name: "--sjdbScore" + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdb_insert_save" + description: "which files to save when sjdb junctions are inserted on the fly\ + \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ + - All ... all files including big Genome, SA and SAindex - this will create\ + \ a complete genome directory" + info: + orig_name: "--sjdbInsertSave" + example: + - "Basic" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Variation parameters" + arguments: + - type: "string" + name: "--var_vcf_file" + description: "path to the VCF file that contains variation data. The 10th column\ + \ should contain the genotype information, e.g. 0/1" + info: + orig_name: "--varVCFfile" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Read Parameters" + arguments: + - type: "string" + name: "--read_files_type" + description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ + - SAM SE ... SAM or BAM single-end reads; for BAM use --read_files_command\ + \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ + \ --read_files_command samtools view" + info: + orig_name: "--readFilesType" + example: + - "Fastx" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--read_files_sam_attr_keep" + description: "for --read_files_type SAM SE/PE, which SAM tags to keep in the output\ + \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n-\ + \ None ... do not keep any tags" + info: + orig_name: "--readFilesSAMattrKeep" + example: + - "All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--read_files_manifest" + description: "path to the \"manifest\" file with the names of read files. The\ + \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ + \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end reads:\ + \ read1_file_name $tab$ - $tab$ read_group_line.\nSpaces, but\ + \ not tabs are allowed in file names.\nIf read_group_line does not start with\ + \ ID:, it can only contain one ID field, and ID: will be added to it.\nIf read_group_line\ + \ starts with ID:, it can contain several fields separated by $tab$, and all\ + \ fields will be be copied verbatim into SAM @RG header line." + info: + orig_name: "--readFilesManifest" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--read_files_prefix" + description: "prefix for the read files names, i.e. it will be added in front\ + \ of the strings in --readFilesIn" + info: + orig_name: "--readFilesPrefix" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--read_files_command" + description: "command line to execute for each of the input file. This command\ + \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ + \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." + info: + orig_name: "--readFilesCommand" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--read_map_number" + description: "number of reads to map from the beginning of the file\n\n-1: map\ + \ all reads" + info: + orig_name: "--readMapNumber" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--read_mates_lengths_in" + description: "Equal/NotEqual - lengths of names,sequences,qualities for both mates\ + \ are the same / not the same. NotEqual is safe in all situations." + info: + orig_name: "--readMatesLengthsIn" + example: + - "NotEqual" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--read_name_separator" + description: "character(s) separating the part of the read names that will be\ + \ trimmed in output (read name after space is always trimmed)" + info: + orig_name: "--readNameSeparator" + example: + - "/" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--read_quality_score_base" + description: "number to be subtracted from the ASCII code to get Phred quality\ + \ score" + info: + orig_name: "--readQualityScoreBase" + example: + - 33 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Read Clipping" + arguments: + - type: "string" + name: "--clip_adapter_type" + description: "adapter clipping type\n\n- Hamming ... adapter clipping based on\ + \ Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ + - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ + \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None ...\ + \ no adapter clipping, all other clip* parameters are disregarded" + info: + orig_name: "--clipAdapterType" + example: + - "Hamming" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--clip3p_nbases" + description: "number(s) of bases to clip from 3p of each mate. If one value is\ + \ given, it will be assumed the same for both mates." + info: + orig_name: "--clip3pNbases" + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--clip3p_adapter_seq" + description: "adapter sequences to clip from 3p of each mate. If one value is\ + \ given, it will be assumed the same for both mates.\n\n- polyA ... polyA sequence\ + \ with the length equal to read length" + info: + orig_name: "--clip3pAdapterSeq" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "double" + name: "--clip3p_adapter_mm_p" + description: "max proportion of mismatches for 3p adapter clipping for each mate.\ + \ If one value is given, it will be assumed the same for both mates." + info: + orig_name: "--clip3pAdapterMMp" + example: + - 0.1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--clip3p_after_adapter_nbases" + description: "number of bases to clip from 3p of each mate after the adapter clipping.\ + \ If one value is given, it will be assumed the same for both mates." + info: + orig_name: "--clip3pAfterAdapterNbases" + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--clip5p_nbases" + description: "number(s) of bases to clip from 5p of each mate. If one value is\ + \ given, it will be assumed the same for both mates." + info: + orig_name: "--clip5pNbases" + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Limits" + arguments: + - type: "long" + name: "--limit_genome_generate_ram" + description: "maximum available RAM (bytes) for genome generation" + info: + orig_name: "--limitGenomeGenerateRAM" + example: + - 31000000000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "long" + name: "--limit_io_buffer_size" + description: "max available buffers size (bytes) for input/output, per thread" + info: + orig_name: "--limitIObufferSize" + example: + - 30000000 + - 50000000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "long" + name: "--limit_out_sam_one_read_bytes" + description: "max size of the SAM record (bytes) for one read. Recommended value:\ + \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" + info: + orig_name: "--limitOutSAMoneReadBytes" + example: + - 100000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--limit_out_sj_one_read" + description: "max number of junctions for one read (including all multi-mappers)" + info: + orig_name: "--limitOutSJoneRead" + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--limit_out_sj_collapsed" + description: "max number of collapsed junctions" + info: + orig_name: "--limitOutSJcollapsed" + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "long" + name: "--limit_bam_sort_ram" + description: "maximum available RAM (bytes) for sorting BAM. If =0, it will be\ + \ set to the genome index size. 0 value can only be used with --genome_load\ + \ NoSharedMemory option." + info: + orig_name: "--limitBAMsortRAM" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--limit_sjdb_insert_nsj" + description: "maximum number of junctions to be inserted to the genome on the\ + \ fly at the mapping stage, including those from annotations and those detected\ + \ in the 1st step of the 2-pass run" + info: + orig_name: "--limitSjdbInsertNsj" + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--limit_nreads_soft" + description: "soft limit on the number of reads" + info: + orig_name: "--limitNreadsSoft" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output: general" + arguments: + - type: "string" + name: "--out_tmp_keep" + description: "whether to keep the temporary files after STAR runs is finished\n\ + \n- None ... remove all temporary files\n- All ... keep all files" + info: + orig_name: "--outTmpKeep" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_std" + description: "which output will be directed to stdout (standard out)\n\n- Log\ + \ ... log messages\n- SAM ... alignments\ + \ in SAM format (which normally are output to Aligned.out.sam file), normal\ + \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ + \ in BAM format, unsorted. Requires --out_sam_type BAM Unsorted\n- BAM_SortedByCoordinate\ + \ ... alignments in BAM format, sorted by coordinate. Requires --out_sam_type\ + \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ + \ in BAM format, unsorted. Requires --quant_mode TranscriptomeSAM" + info: + orig_name: "--outStd" + example: + - "Log" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_reads_unmapped" + description: "output of unmapped and partially mapped (i.e. mapped only one mate\ + \ of a paired end read) reads in separate file(s).\n\n- None ... no output\n\ + - Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" + info: + orig_name: "--outReadsUnmapped" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_qs_conversion_add" + description: "add this number to the quality score (e.g. to convert from Illumina\ + \ to Sanger, use -31)" + info: + orig_name: "--outQSconversionAdd" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_multimapper_order" + description: "order of multimapping alignments in the output files\n\n- Old_2.4\ + \ ... quasi-random order used before 2.5.0\n- Random \ + \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ + \ are always adjacent, all alignment for each read stay together. This option\ + \ will become default in the future releases." + info: + orig_name: "--outMultimapperOrder" + example: + - "Old_2.4" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output: SAM and BAM" + arguments: + - type: "string" + name: "--out_sam_type" + description: "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without\ + \ sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n\ + 2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate\ + \ ... sorted by coordinate. This option will allocate extra memory for sorting\ + \ which can be specified by --limit_bam_sort_ram." + info: + orig_name: "--outSAMtype" + example: + - "SAM" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--out_sam_mode" + description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ + \ SAM output\n- NoQS ... full SAM but without quality scores" + info: + orig_name: "--outSAMmode" + example: + - "Full" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_sam_strand_field" + description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ + - intronMotif ... strand derived from the intron motif. This option changes\ + \ the output alignments: reads with inconsistent and/or non-canonical introns\ + \ are filtered out." + info: + orig_name: "--outSAMstrandField" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_sam_attributes" + description: "a string of desired SAM attributes, in the order desired for the\ + \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ + - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ + \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ... number\ + \ of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard\ + \ SAM tag.\n- HI ... multiple alignment index, starts with --out_sam_attr_ih_start\ + \ (=1 by default). Standard SAM tag.\n- AS ... local alignment score,\ + \ +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE\ + \ reads, total score for two mates. Stadnard SAM tag.\n- nM ... number\ + \ of mismatches. For PE reads, sum over two mates.\n- NM ... edit distance\ + \ to the reference (number of mismatched + inserted + deleted bases) for each\ + \ mate. Standard SAM tag.\n- MD ... string encoding mismatched and\ + \ deleted reference bases (see standard SAM specifications). Standard SAM tag.\n\ + - jM ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical;\ + \ 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions\ + \ database is used, and a junction is annotated, 20 is added to its motif value.\n\ + - jI ... start and end of introns for all junctions (1-based).\n- XS\ + \ ... alignment strand according to --out_sam_strand_field.\n- MC \ + \ ... mate's CIGAR string. Standard SAM tag.\n- ch ... marks\ + \ all segment of all chimeric alingments for --chim_out_type WithinBAM output.\n\ + - cN ... number of bases clipped from the read ends: 5' and 3'\n***Variation:\n\ + - vA ... variant allele\n- vG ... genomic coordinate of the\ + \ variant overlapped by the read.\n- vW ... 1 - alignment passes WASP\ + \ filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires\ + \ --wasp_output_mode SAMtag.\n- ha ... haplotype (1/2) when mapping\ + \ to the diploid genome. Requires genome generated with --genomeTransformType\ + \ Diploid .\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\ + \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ... gene\ + \ ID and gene name for unique-gene reads.\n- gx gn ... gene IDs and gene\ + \ names for unique- and multi-gene reads.\n- CB UB ... error-corrected\ + \ cell barcodes and UMIs for solo* demultiplexing. Requires --out_sam_type BAM\ + \ SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS \ + \ ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ \ + \ ... quality of the entire barcode.\n- sF ... type of feature overlap\ + \ and number of features for each alignment\n***Unsupported/undocumented:\n\ + - rB ... alignment block read/genomic coordinates.\n- vR ...\ + \ read coordinate of the variant." + info: + orig_name: "--outSAMattributes" + example: + - "Standard" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--out_sam_attr_ih_start" + description: "start value for the IH attribute. 0 may be required by some downstream\ + \ software, such as Cufflinks or StringTie." + info: + orig_name: "--outSAMattrIHstart" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_sam_unmapped" + description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ + \ ... no output\n- Within ... output unmapped reads within the main SAM file\ + \ (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped mate for\ + \ each alignment, and, in case of unsorted output, keep it adjacent to its mapped\ + \ mate. Only affects multi-mapping reads." + info: + orig_name: "--outSAMunmapped" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--out_sam_order" + description: "type of sorting for the SAM output\n\nPaired: one mate after the\ + \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ + \ other for all paired alignments, the order is kept the same as in the input\ + \ FASTQ files" + info: + orig_name: "--outSAMorder" + example: + - "Paired" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_sam_primary_flag" + description: "which alignments are considered primary - all others will be marked\ + \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with the\ + \ best score is primary\n- AllBestScore ... all alignments with the best score\ + \ are primary" + info: + orig_name: "--outSAMprimaryFlag" + example: + - "OneBestScore" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_sam_read_id" + description: "read ID record type\n\n- Standard ... first word (until space) from\ + \ the FASTx read ID line, removing /1,/2 from the end\n- Number ... read number\ + \ (index) in the FASTx file" + info: + orig_name: "--outSAMreadID" + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_sam_mapq_unique" + description: "0 to 255: the MAPQ value for unique mappers" + info: + orig_name: "--outSAMmapqUnique" + example: + - 255 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_sam_flag_or" + description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ + \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by\ + \ STAR, and after outSAMflagAND. Can be used to set specific bits that are not\ + \ set otherwise." + info: + orig_name: "--outSAMflagOR" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_sam_flag_and" + description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ + \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by\ + \ STAR, but before outSAMflagOR. Can be used to unset specific bits that are\ + \ not set otherwise." + info: + orig_name: "--outSAMflagAND" + example: + - 65535 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_sam_attr_rg_line" + description: "SAM/BAM read group line. The first word contains the read group\ + \ identifier and must start with \"ID:\", e.g. --out_sam_attr_rg_line ID:xxx\ + \ CN:yy \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment.\ + \ Any spaces in the tag values have to be double quoted.\nComma separated RG\ + \ lines correspons to different (comma separated) input files in --readFilesIn.\ + \ Commas have to be surrounded by spaces, e.g.\n--out_sam_attr_rg_line ID:xxx\ + \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" + info: + orig_name: "--outSAMattrRGline" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--out_sam_header_hd" + description: "@HD (header) line of the SAM header" + info: + orig_name: "--outSAMheaderHD" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--out_sam_header_pg" + description: "extra @PG (software) line of the SAM header (in addition to STAR)" + info: + orig_name: "--outSAMheaderPG" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--out_sam_header_comment_file" + description: "path to the file with @CO (comment) lines of the SAM header" + info: + orig_name: "--outSAMheaderCommentFile" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_sam_filter" + description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ + \ ... only keep the reads for which all alignments are to the extra reference\ + \ sequences added with --genome_fasta_files at the mapping stage.\n- KeepAllAddedReferences\ + \ ... keep all alignments to the extra reference sequences added with --genome_fasta_files\ + \ at the mapping stage." + info: + orig_name: "--outSAMfilter" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--out_sam_mult_nmax" + description: "max number of multiple alignments for a read that will be output\ + \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ + \ scoring alignment will be output first\n\n- -1 ... all alignments (up to --out_filter_multimap_nmax)\ + \ will be output" + info: + orig_name: "--outSAMmultNmax" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_sam_tlen" + description: "calculation method for the TLEN field in the SAM/BAM files\n\n-\ + \ 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ + \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ + \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ + \ from 1 for overlapping mates with protruding ends" + info: + orig_name: "--outSAMtlen" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_bam_compression" + description: "-1 to 10 BAM compression level, -1=default compression (6?), 0=no\ + \ compression, 10=maximum compression" + info: + orig_name: "--outBAMcompression" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_bam_sorting_thread_n" + description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." + info: + orig_name: "--outBAMsortingThreadN" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_bam_sorting_bins_n" + description: ">0: number of genome bins for coordinate-sorting" + info: + orig_name: "--outBAMsortingBinsN" + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "BAM processing" + arguments: + - type: "string" + name: "--bam_remove_duplicates_type" + description: "mark duplicates in the BAM file, for now only works with (i) sorted\ + \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- -\ + \ ... no duplicate removal/marking\n- UniqueIdentical\ + \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ + \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ + \ unique mappers but not multimappers." + info: + orig_name: "--bamRemoveDuplicatesType" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bam_remove_duplicates_mate2bases_n" + description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ + \ for RAMPAGE)" + info: + orig_name: "--bamRemoveDuplicatesMate2basesN" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output Wiggle" + arguments: + - type: "string" + name: "--out_wig_type" + description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ + . Requires sorted BAM: --out_sam_type BAM SortedByCoordinate .\n\n1st word:\n\ + - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ + \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ + \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from only\ + \ 2nd read" + info: + orig_name: "--outWigType" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--out_wig_strand" + description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ + \ strands, str1 and str2\n- Unstranded ... collapsed strands" + info: + orig_name: "--outWigStrand" + example: + - "Stranded" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_wig_references_prefix" + description: "prefix matching reference names to include in the output wiggle\ + \ file, e.g. \"chr\", default \"-\" - include all references" + info: + orig_name: "--outWigReferencesPrefix" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_wig_norm" + description: "type of normalization for the signal\n\n- RPM ... reads per million\ + \ of mapped reads\n- None ... no normalization, \"raw\" counts" + info: + orig_name: "--outWigNorm" + example: + - "RPM" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output Filtering" + arguments: + - type: "string" + name: "--out_filter_type" + description: "type of filtering\n\n- Normal ... standard filtering using only\ + \ current alignment\n- BySJout ... keep only those reads that contain junctions\ + \ that passed filtering into SJ.out.tab" + info: + orig_name: "--outFilterType" + example: + - "Normal" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_filter_multimap_score_range" + description: "the score range below the maximum score for multimapping alignments" + info: + orig_name: "--outFilterMultimapScoreRange" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_filter_multimap_nmax" + description: "maximum number of loci the read is allowed to map to. Alignments\ + \ (all of them) will be output only if the read maps to no more loci than this\ + \ value.\n\nOtherwise no alignments will be output, and the read will be counted\ + \ as \"mapped to too many loci\" in the Log.final.out ." + info: + orig_name: "--outFilterMultimapNmax" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_filter_mismatch_nmax" + description: "alignment will be output only if it has no more mismatches than\ + \ this value." + info: + orig_name: "--outFilterMismatchNmax" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--out_filter_mismatch_nover_lmax" + description: "alignment will be output only if its ratio of mismatches to *mapped*\ + \ length is less than or equal to this value." + info: + orig_name: "--outFilterMismatchNoverLmax" + example: + - 0.3 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--out_filter_mismatch_nover_read_lmax" + description: "alignment will be output only if its ratio of mismatches to *read*\ + \ length is less than or equal to this value." + info: + orig_name: "--outFilterMismatchNoverReadLmax" + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_filter_score_min" + description: "alignment will be output only if its score is higher than or equal\ + \ to this value." + info: + orig_name: "--outFilterScoreMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--out_filter_score_min_over_lread" + description: "same as outFilterScoreMin, but normalized to read length (sum of\ + \ mates' lengths for paired-end reads)" + info: + orig_name: "--outFilterScoreMinOverLread" + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_filter_match_nmin" + description: "alignment will be output only if the number of matched bases is\ + \ higher than or equal to this value." + info: + orig_name: "--outFilterMatchNmin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--out_filter_match_nmin_over_lread" + description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ + \ of mates' lengths for paired-end reads)." + info: + orig_name: "--outFilterMatchNminOverLread" + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_filter_intron_motifs" + description: "filter alignment using their motifs\n\n- None \ + \ ... no filtering\n- RemoveNoncanonical ... filter out\ + \ alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ + \ ... filter out alignments that contain non-canonical unannotated junctions\ + \ when using annotated splice junctions database. The annotated non-canonical\ + \ junctions will be kept." + info: + orig_name: "--outFilterIntronMotifs" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--out_filter_intron_strands" + description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ + \ alignments that have junctions with inconsistent strands\n- None \ + \ ... no filtering" + info: + orig_name: "--outFilterIntronStrands" + example: + - "RemoveInconsistentStrands" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output splice junctions (SJ.out.tab)" + arguments: + - type: "string" + name: "--out_sj_type" + description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ + \ output\n- None ... no splice junction output" + info: + orig_name: "--outSJtype" + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output Filtering: Splice Junctions" + arguments: + - type: "string" + name: "--out_sj_filter_reads" + description: "which reads to consider for collapsed splice junctions output\n\n\ + - All ... all reads, unique- and multi-mappers\n- Unique ... uniquely mapping\ + \ reads only" + info: + orig_name: "--outSJfilterReads" + example: + - "All" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--out_sj_filter_overhang_min" + description: "minimum overhang length for splice junctions on both sides for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif,\ + \ (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\ndoes not apply\ + \ to annotated junctions" + info: + orig_name: "--outSJfilterOverhangMin" + example: + - 30 + - 12 + - 12 + - 12 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--out_sj_filter_count_unique_min" + description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ + \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and\ + \ GT/AT motif. -1 means no output for that motif\n\nJunctions are output if\ + \ one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are\ + \ satisfied\ndoes not apply to annotated junctions" + info: + orig_name: "--outSJfilterCountUniqueMin" + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--out_sj_filter_count_total_min" + description: "minimum total (multi-mapping+unique) read count per junction for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif,\ + \ (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions\ + \ are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ + \ conditions are satisfied\ndoes not apply to annotated junctions" + info: + orig_name: "--outSJfilterCountTotalMin" + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--out_sj_filter_dist_to_other_sj_min" + description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ + does not apply to annotated junctions" + info: + orig_name: "--outSJfilterDistToOtherSJmin" + example: + - 10 + - 0 + - 5 + - 10 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--out_sj_filter_intron_max_vs_read_n" + description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ + \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by 2\ + \ reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ + does not apply to annotated junctions" + info: + orig_name: "--outSJfilterIntronMaxVsReadN" + example: + - 50000 + - 100000 + - 200000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Scoring" + arguments: + - type: "integer" + name: "--score_gap" + description: "splice junction penalty (independent on intron motif)" + info: + orig_name: "--scoreGap" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--score_gap_noncan" + description: "non-canonical junction penalty (in addition to scoreGap)" + info: + orig_name: "--scoreGapNoncan" + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--score_gap_gcag" + description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" + info: + orig_name: "--scoreGapGCAG" + example: + - -4 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--score_gap_atac" + description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" + info: + orig_name: "--scoreGapATAC" + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--score_genomic_length_log2scale" + description: "extra score logarithmically scaled with genomic length of the alignment:\ + \ scoreGenomicLengthLog2scale*log2(genomicLength)" + info: + orig_name: "--scoreGenomicLengthLog2scale" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--score_del_open" + description: "deletion open penalty" + info: + orig_name: "--scoreDelOpen" + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--score_del_base" + description: "deletion extension penalty per base (in addition to scoreDelOpen)" + info: + orig_name: "--scoreDelBase" + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--score_ins_open" + description: "insertion open penalty" + info: + orig_name: "--scoreInsOpen" + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--score_ins_base" + description: "insertion extension penalty per base (in addition to scoreInsOpen)" + info: + orig_name: "--scoreInsBase" + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--score_stitch_sj_shift" + description: "maximum score reduction while searching for SJ boundaries in the\ + \ stitching step" + info: + orig_name: "--scoreStitchSJshift" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Alignments and Seeding" + arguments: + - type: "integer" + name: "--seed_search_start_lmax" + description: "defines the search start point through the read - the read is split\ + \ into pieces no longer than this value" + info: + orig_name: "--seedSearchStartLmax" + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--seed_search_start_lmax_over_lread" + description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ + \ for paired-end reads)" + info: + orig_name: "--seedSearchStartLmaxOverLread" + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seed_search_lmax" + description: "defines the maximum length of the seeds, if =0 seed length is not\ + \ limited" + info: + orig_name: "--seedSearchLmax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seed_multimap_nmax" + description: "only pieces that map fewer than this value are utilized in the stitching\ + \ procedure" + info: + orig_name: "--seedMultimapNmax" + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seed_per_read_nmax" + description: "max number of seeds per read" + info: + orig_name: "--seedPerReadNmax" + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seed_per_window_nmax" + description: "max number of seeds per window" + info: + orig_name: "--seedPerWindowNmax" + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seed_none_loci_per_window" + description: "max number of one seed loci per window" + info: + orig_name: "--seedNoneLociPerWindow" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seed_split_min" + description: "min length of the seed sequences split by Ns or mate gap" + info: + orig_name: "--seedSplitMin" + example: + - 12 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seed_map_min" + description: "min length of seeds to be mapped" + info: + orig_name: "--seedMapMin" + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--align_intron_min" + description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ + \ otherwise it is considered Deletion" + info: + orig_name: "--alignIntronMin" + example: + - 21 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--align_intron_max" + description: "maximum intron size, if 0, max intron size will be determined by\ + \ (2^winBinNbits)*winAnchorDistNbins" + info: + orig_name: "--alignIntronMax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--align_mates_gap_max" + description: "maximum gap between two mates, if 0, max intron gap will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: + orig_name: "--alignMatesGapMax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--align_sj_overhang_min" + description: "minimum overhang (i.e. block size) for spliced alignments" + info: + orig_name: "--alignSJoverhangMin" + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--align_sj_stitch_mismatch_nmax" + description: "maximum number of mismatches for stitching of the splice junctions\ + \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3)\ + \ GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." + info: + orig_name: "--alignSJstitchMismatchNmax" + example: + - 0 + - -1 + - 0 + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--align_sjdb_overhang_min" + description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ + \ alignments" + info: + orig_name: "--alignSJDBoverhangMin" + example: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--align_spliced_mate_map_lmin" + description: "minimum mapped length for a read mate that is spliced" + info: + orig_name: "--alignSplicedMateMapLmin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--align_spliced_mate_map_lmin_over_lmate" + description: "alignSplicedMateMapLmin normalized to mate length" + info: + orig_name: "--alignSplicedMateMapLminOverLmate" + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--align_windows_per_read_nmax" + description: "max number of windows per read" + info: + orig_name: "--alignWindowsPerReadNmax" + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--align_transcripts_per_window_nmax" + description: "max number of transcripts per window" + info: + orig_name: "--alignTranscriptsPerWindowNmax" + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--align_transcripts_per_read_nmax" + description: "max number of different alignments per read to consider" + info: + orig_name: "--alignTranscriptsPerReadNmax" + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--align_ends_type" + description: "type of read ends alignment\n\n- Local ... standard\ + \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ + \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ + \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ + \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ + \ local alignment" + info: + orig_name: "--alignEndsType" + example: + - "Local" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--align_ends_protrude" + description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ + \ mate downstream of the start (end) of the -strand mate\n\n1st word: int: maximum\ + \ number of protrusion bases allowed\n2nd word: string:\n- \ + \ ConcordantPair ... report alignments with non-zero protrusion as concordant\ + \ pairs\n- DiscordantPair ... report alignments with non-zero\ + \ protrusion as discordant pairs" + info: + orig_name: "--alignEndsProtrude" + example: + - "0 ConcordantPair" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--align_soft_clip_at_reference_ends" + description: "allow the soft-clipping of the alignments past the end of the chromosomes\n\ + \n- Yes ... allow\n- No ... prohibit, useful for compatibility with Cufflinks" + info: + orig_name: "--alignSoftClipAtReferenceEnds" + example: + - "Yes" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--align_insertion_flush" + description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ + \ are not flushed\n- Right ... insertions are flushed to the right" + info: + orig_name: "--alignInsertionFlush" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Paired-End reads" + arguments: + - type: "integer" + name: "--pe_overlap_nbases_min" + description: "minimum number of overlapping bases to trigger mates merging and\ + \ realignment. Specify >0 value to switch on the \"merginf of overlapping mates\"\ + \ algorithm." + info: + orig_name: "--peOverlapNbasesMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--pe_overlap_mm_p" + description: "maximum proportion of mismatched bases in the overlap area" + info: + orig_name: "--peOverlapMMp" + example: + - 0.01 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Windows, Anchors, Binning" + arguments: + - type: "integer" + name: "--win_anchor_multimap_nmax" + description: "max number of loci anchors are allowed to map to" + info: + orig_name: "--winAnchorMultimapNmax" + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--win_bin_nbits" + description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ + \ each window will occupy an integer number of bins." + info: + orig_name: "--winBinNbits" + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--win_anchor_dist_nbins" + description: "max number of bins between two anchors that allows aggregation of\ + \ anchors into one window" + info: + orig_name: "--winAnchorDistNbins" + example: + - 9 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--win_flank_nbins" + description: "log2(winFlank), where win Flank is the size of the left and right\ + \ flanking regions for each window" + info: + orig_name: "--winFlankNbins" + example: + - 4 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--win_read_coverage_relative_min" + description: "minimum relative coverage of the read sequence by the seeds in a\ + \ window, for STARlong algorithm only." + info: + orig_name: "--winReadCoverageRelativeMin" + example: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--win_read_coverage_bases_min" + description: "minimum number of bases covered by the seeds in a window , for STARlong\ + \ algorithm only." + info: + orig_name: "--winReadCoverageBasesMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Chimeric Alignments" + arguments: + - type: "string" + name: "--chim_out_type" + description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ + - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n-\ + \ WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n-\ + \ WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ + \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ + \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" + info: + orig_name: "--chimOutType" + example: + - "Junctions" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--chim_segment_min" + description: "minimum length of chimeric segment length, if ==0, no chimeric output" + info: + orig_name: "--chimSegmentMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chim_score_min" + description: "minimum total (summed) score of the chimeric segments" + info: + orig_name: "--chimScoreMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chim_score_drop_max" + description: "max drop (difference) of chimeric score (the sum of scores of all\ + \ chimeric segments) from the read length" + info: + orig_name: "--chimScoreDropMax" + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chim_score_separation" + description: "minimum difference (separation) between the best chimeric score\ + \ and the next one" + info: + orig_name: "--chimScoreSeparation" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chim_score_junction_non_gtag" + description: "penalty for a non-GT/AG chimeric junction" + info: + orig_name: "--chimScoreJunctionNonGTAG" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chim_junction_overhang_min" + description: "minimum overhang for a chimeric junction" + info: + orig_name: "--chimJunctionOverhangMin" + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chim_segment_read_gap_max" + description: "maximum gap in the read sequence between chimeric segments" + info: + orig_name: "--chimSegmentReadGapMax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--chim_filter" + description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ + - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ + \ junction" + info: + orig_name: "--chimFilter" + example: + - "banGenomicN" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--chim_main_segment_mult_nmax" + description: "maximum number of multi-alignments for the main chimeric segment.\ + \ =1 will prohibit multimapping main segments." + info: + orig_name: "--chimMainSegmentMultNmax" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chim_multimap_nmax" + description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the old\ + \ scheme for chimeric detection which only considered unique alignments" + info: + orig_name: "--chimMultimapNmax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chim_multimap_score_range" + description: "the score range for multi-mapping chimeras below the best chimeric\ + \ score. Only works with --chim_multimap_nmax > 1" + info: + orig_name: "--chimMultimapScoreRange" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chim_nonchim_score_drop_min" + description: "to trigger chimeric detection, the drop in the best non-chimeric\ + \ alignment score with respect to the read length has to be greater than this\ + \ value" + info: + orig_name: "--chimNonchimScoreDropMin" + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chim_out_junction_format" + description: "formatting type for the Chimeric.out.junction file\n\n- 0 ... no\ + \ comment lines/headers\n- 1 ... comment lines at the end of the file: command\ + \ line and Nreads: total, unique/multi-mapping" + info: + orig_name: "--chimOutJunctionFormat" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Quantification of Annotations" + arguments: + - type: "string" + name: "--quant_mode" + description: "types of quantification requested\n\n- - ... none\n\ + - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ + \ file\n- GeneCounts ... count reads per gene" + info: + orig_name: "--quantMode" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--quant_transcriptome_bam_compression" + description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no BAM\ + \ output\n- -1 ... default compression (6?)\n- 0 ... no compression\n- 10\ + \ ... maximum compression" + info: + orig_name: "--quantTranscriptomeBAMcompression" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--quant_transcriptome_sam_output" + description: "alignment filtering for TranscriptomeSAM output\n\n- BanSingleEnd_BanIndels_ExtendSoftclip\ + \ ... prohibit indels and single-end alignments, extend softclips - compatible\ + \ with RSEM\n- BanSingleEnd ... prohibit single-end alignments,\ + \ allow indels and softclips\n- BanSingleEnd_ExtendSoftclip ... prohibit single-end\ + \ alignments, extend softclips, allow indels" + info: + orig_name: "--quantTranscriptomeSAMoutput" + example: + - "BanSingleEnd_BanIndels_ExtendSoftclip" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "2-pass Mapping" + arguments: + - type: "string" + name: "--twopass_mode" + description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ + \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ + \ the genome indices on the fly" + info: + orig_name: "--twopassMode" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--twopass1reads_n" + description: "number of reads to process for the 1st step. Use very large number\ + \ (or default -1) to map all reads in the first step." + info: + orig_name: "--twopass1readsN" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "WASP parameters" + arguments: + - type: "string" + name: "--wasp_output_mode" + description: "WASP allele-specific output type. This is re-implementation of the\ + \ original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ + \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature\ + \ Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 .\n\ + \n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" + info: + orig_name: "--waspOutputMode" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "STARsolo (single cell RNA-seq) parameters" + arguments: + - type: "string" + name: "--solo_type" + description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet)\ + \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X\ + \ Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length,\ + \ one UMI of fixed length and one adapter sequence of fixed length are allowed\ + \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode\ + \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\ + \ if paired-end] CellBarcode_read . Requires --out_sam_type BAM Unsorted [and/or\ + \ SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate\ + \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\ + \ UMI sequences, alignments deduplicated according to alignment start and end\ + \ (after extending soft-clipped bases)" + info: + orig_name: "--soloType" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--solo_cb_type" + description: "cell barcode type\n\nSequence: cell barcode is a sequence (standard\ + \ option)\nString: cell barcode is an arbitrary string" + info: + orig_name: "--soloCBtype" + example: + - "Sequence" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--solo_cb_whitelist" + description: "file(s) with whitelist(s) of cell barcodes. Only --solo_type CB_UMI_Complex\ + \ allows more than one whitelist file.\n\n- None ... no whitelist:\ + \ all cell barcodes are allowed" + info: + orig_name: "--soloCBwhitelist" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--solo_cb_start" + description: "cell barcode start base" + info: + orig_name: "--soloCBstart" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--solo_cb_len" + description: "cell barcode length" + info: + orig_name: "--soloCBlen" + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--solo_umi_start" + description: "UMI start base" + info: + orig_name: "--soloUMIstart" + example: + - 17 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--solo_umi_len" + description: "UMI length" + info: + orig_name: "--soloUMIlen" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--solo_barcode_read_length" + description: "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n\ + - 0 ... not defined, do not check" + info: + orig_name: "--soloBarcodeReadLength" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--solo_barcode_mate" + description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\ + \n- 0 ... barcode sequence is on separate read, which should always be the\ + \ last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part\ + \ of mate 1\n- 2 ... barcode sequence is a part of mate 2" + info: + orig_name: "--soloBarcodeMate" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--solo_cb_position" + description: "position of Cell Barcode(s) on the barcode read.\n\nPresently only\ + \ works with --solo_type CB_UMI_Complex, and barcodes are assumed to be on Read2.\n\ + Format for each barcode: startAnchor_startPosition_endAnchor_endPosition\nstart(end)Anchor\ + \ defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter\ + \ start; 3: adapter end\nstart(end)Position is the 0-based position with of\ + \ the CB start(end) with respect to the Anchor Base\nString for different barcodes\ + \ are separated by space.\nExample: inDrop (Zilionis et al, Nat. Protocols,\ + \ 2017):\n--solo_cb_position 0_0_2_-1 3_1_3_8" + info: + orig_name: "--soloCBposition" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--solo_umi_position" + description: "position of the UMI on the barcode read, same as soloCBposition\n\ + \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--solo_cb_position\ + \ 3_9_3_14" + info: + orig_name: "--soloUMIposition" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--solo_adapter_sequence" + description: "adapter sequence to anchor barcodes. Only one adapter sequence is\ + \ allowed." + info: + orig_name: "--soloAdapterSequence" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--solo_adapter_mismatches_nmax" + description: "maximum number of mismatches allowed in adapter sequence." + info: + orig_name: "--soloAdapterMismatchesNmax" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--solo_cb_match_wl_type" + description: "matching the Cell Barcodes to the WhiteList\n\n- Exact \ + \ ... only exact matches allowed\n- 1MM \ + \ ... only one match in whitelist with 1 mismatched base allowed. Allowed\ + \ CBs have to have at least one read with exact match.\n- 1MM_multi \ + \ ... multiple matches in whitelist with 1 mismatched base allowed,\ + \ posterior probability calculation is used choose one of the matches.\nAllowed\ + \ CBs have to have at least one read with exact match. This option matches best\ + \ with CellRanger 2.2.0\n- 1MM_multi_pseudocounts ... same as 1MM_Multi,\ + \ but pseudocounts of 1 are added to all whitelist barcodes.\n- 1MM_multi_Nbase_pseudocounts\ + \ ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for\ + \ CBs with N-bases. This option matches best with CellRanger >= 3.0.0\n- EditDist_2\ + \ ... allow up to edit distance of 3 fpr each of the barcodes.\ + \ May include one deletion + one insertion. Only works with --solo_type CB_UMI_Complex.\ + \ Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio\ + \ Split-seq pipeline." + info: + orig_name: "--soloCBmatchWLtype" + example: + - "1MM_multi" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--solo_input_sam_attr_barcode_seq" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor instance,\ + \ for 10X CellRanger or STARsolo BAMs, use --solo_input_sam_attr_barcode_seq\ + \ CR UR .\nThis parameter is required when running STARsolo with input from\ + \ SAM." + info: + orig_name: "--soloInputSAMattrBarcodeSeq" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--solo_input_sam_attr_barcode_qual" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\ + \ instance, for 10X CellRanger or STARsolo BAMs, use --solo_input_sam_attr_barcode_qual\ + \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\ + \ to all bases." + info: + orig_name: "--soloInputSAMattrBarcodeQual" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--solo_strand" + description: "strandedness of the solo libraries:\n\n- Unstranded ... no strand\ + \ information\n- Forward ... read strand same as the original RNA molecule\n\ + - Reverse ... read strand opposite to the original RNA molecule" + info: + orig_name: "--soloStrand" + example: + - "Forward" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--solo_features" + description: "genomic features for which the UMI counts per Cell Barcode are collected\n\ + \n- Gene ... genes: reads match the gene transcript\n- SJ \ + \ ... splice junctions: reported in SJ.out.tab\n- GeneFull ...\ + \ full gene (pre-mRNA): count all reads overlapping genes' exons and introns\n\ + - GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping\ + \ genes' exons and introns: prioritize 100% overlap with exons\n- GeneFull_Ex50pAS\ + \ ... full gene (pre-RNA): count all reads overlapping genes' exons and\ + \ introns: prioritize >50% overlap with exons. Do not count reads with 100%\ + \ exonic overlap in the antisense direction." + info: + orig_name: "--soloFeatures" + example: + - "Gene" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--solo_multi_mappers" + description: "counting method for reads mapping to multiple genes\n\n- Unique\ + \ ... count only reads that map to unique genes\n- Uniform ... uniformly\ + \ distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs\ + \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\ + \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\ + \ if not.\n- EM ... multi-gene UMIs are distributed using Expectation\ + \ Maximization algorithm" + info: + orig_name: "--soloMultiMappers" + example: + - "Unique" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--solo_umi_dedup" + description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All \ + \ ... all UMIs with 1 mismatch distance to each other are\ + \ collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows\ + \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\ + \ (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools,\ + \ but with more stringent criteria for duplicate UMIs\n- Exact \ + \ ... only exactly matching UMIs are collapsed.\n- NoDedup \ + \ ... no deduplication of UMIs, count all reads.\n- 1MM_CR \ + \ ... CellRanger2-4 algorithm for 1MM UMI collapsing." + info: + orig_name: "--soloUMIdedup" + example: + - "1MM_All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--solo_umi_filtering" + description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\ + - - ... basic filtering: remove UMIs with N and homopolymers\ + \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove lower-count\ + \ UMIs that map to more than one gene.\n- MultiGeneUMI_All ... basic + remove\ + \ all UMIs that map to more than one gene.\n- MultiGeneUMI_CR ... basic +\ + \ remove lower-count UMIs that map to more than one gene, matching CellRanger\ + \ > 3.0.0 .\nOnly works with --solo_umi_dedup 1MM_CR" + info: + orig_name: "--soloUMIfiltering" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--solo_out_file_names" + description: "file names for STARsolo output:\n\nfile_name_prefix gene_names\ + \ barcode_sequences cell_feature_count_matrix" + info: + orig_name: "--soloOutFileNames" + example: + - "Solo.out/" + - "features.tsv" + - "barcodes.tsv" + - "matrix.mtx" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--solo_cell_filter" + description: "cell filtering type and parameters\n\n- None ... do not\ + \ output filtered cells\n- TopCells ... only report top cells by UMI\ + \ count, followed by the exact number of cells\n- CellRanger2.2 ... simple\ + \ filtering of CellRanger 2.2.\nCan be followed by numbers: number of expected\ + \ cells, robust maximum percentile for UMI count, maximum to minimum ratio for\ + \ UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000; \ + \ maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering\ + \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\ + \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\ + Can be followed by 10 numeric parameters: nExpectedCells maxPercentile \ + \ maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN FDR\ + \ simN\nThe harcoded values are from CellRanger: 3000 \ + \ 0.99 10 45000 90000 500 0.01 20000\ + \ 0.01 10000" + info: + orig_name: "--soloCellFilter" + example: + - "CellRanger2.2" + - "3000" + - "0.99" + - "10" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--solo_out_format_features_gene_field3" + description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\ + \ is output." + info: + orig_name: "--soloOutFormatFeaturesGeneField3" + example: + - "Gene Expression" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--solo_cell_read_stats" + description: "Output reads statistics for each CB\n\n- Standard ... standard\ + \ output" + info: + orig_name: "--soloCellReadStats" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "--readFilesIn" + description: "The single-end or paired-end R1 FastQ files to be processed." + info: null + example: + - "mysample_S1_L001_R1_001.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--input_r2" + description: "The paired-end R2 FastQ files to be processed. Only required if\ + \ --input is a paired-end R1 file." + info: null + example: + - "mysample_S1_L001_R2_001.fastq.gz" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--aligned_reads" + description: "The output file containing the aligned reads." + info: null + example: + - "aligned_reads.bam" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--reads_per_gene" + description: "The output file containing the number of reads per gene." + info: null + example: + - "reads_per_gene.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--unmapped" + description: "The output file containing the unmapped reads." + info: null + example: + - "unmapped.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--unmapped_r2" + description: "The output file containing the unmapped R2 reads." + info: null + example: + - "unmapped_r2.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chimeric_junctions" + description: "The output file containing the chimeric junctions." + info: null + example: + - "chimeric_junctions.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--log" + description: "The output file containing the log of the alignment process." + info: null + example: + - "log.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--splice_junctions" + description: "The output file containing the splice junctions." + info: null + example: + - "splice_junctions.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--reads_aligned_to_transcriptome" + description: "The output file containing the alignments to transcriptome in BAM\ + \ formats. This file is generated when --quantMode is set to TranscriptomeSAM." + info: null + example: + - "transcriptome_aligned.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "python_script" + path: "script.py" + is_executable: true +description: "Aligns reads to a reference genome using STAR.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "align" +- "fasta" +- "genome" +license: "MIT" +references: + doi: + - "10.1093/bioinformatics/bts635" +links: + repository: "https://github.com/alexdobin/STAR" + documentation: "https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python:3.12-slim" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "procps" + - "gzip" + - "bzip2" + interactive: false + - type: "docker" + run: + - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ + \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ + \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ + \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ + \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ + \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" + env: + - "STAR_VERSION 2.7.11b" + - "PACKAGES gcc g++ make wget zlib1g-dev unzip xxd" + - type: "python" + user: false + packages: + - "pyyaml" + upgrade: true + - type: "docker" + run: + - "STAR --version | sed 's#\\(.*\\)#star: \"\\1\"#' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/star/star_align_reads/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/star/star_align_reads" + executable: "target/nextflow/star/star_align_reads/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/main.nf new file mode 100644 index 0000000..35f785f --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/main.nf @@ -0,0 +1,7046 @@ +// star_align_reads v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Angela Oliveira Pisco (author) +// * Robrecht Cannoodt (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "star_align_reads", + "namespace" : "star", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Run Parameters", + "arguments" : [ + { + "type" : "integer", + "name" : "--run_rng_seed", + "description" : "random number generator seed.", + "info" : { + "orig_name" : "--runRNGseed" + }, + "example" : [ + 777 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Genome Parameters", + "arguments" : [ + { + "type" : "file", + "name" : "--genome_dir", + "description" : "path to the directory where genome files are stored (for --runMode alignReads) or will be generated (for --runMode generateGenome)", + "info" : { + "orig_name" : "--genomeDir" + }, + "example" : [ + "./GenomeDir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--genome_load", + "description" : "mode of shared memory usage for the genome files. Only used with --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and keep it in memory after run\n- LoadAndRemove ... load genome into shared but remove it after run\n- LoadAndExit ... load genome into shared memory and exit, keeping the genome in memory for future runs\n- Remove ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory ... do not use shared memory, each job will have its own private copy of the genome", + "info" : { + "orig_name" : "--genomeLoad" + }, + "example" : [ + "NoSharedMemory" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_fasta_files", + "description" : "path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.\n\nRequired for the genome generation (--runMode genomeGenerate). Can also be used in the mapping (--runMode alignReads) to add extra (new) sequences to the genome (e.g. spike-ins).", + "info" : { + "orig_name" : "--genomeFastaFiles" + }, + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--genome_file_sizes", + "description" : "genome files exact sizes in bytes. Typically, this should not be defined by the user.", + "info" : { + "orig_name" : "--genomeFileSizes" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--genome_transform_output", + "description" : "which output to transform back to original genome\n\n- SAM ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n- Quant ... quantifications (from --quant_mode option)\n- None ... no transformation of the output", + "info" : { + "orig_name" : "--genomeTransformOutput" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--genome_chr_set_mitochondrial", + "description" : "names of the mitochondrial chromosomes. Presently only used for STARsolo statistics output/", + "info" : { + "orig_name" : "--genomeChrSetMitochondrial" + }, + "example" : [ + "chrM", + "M", + "MT" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Splice Junctions Database", + "arguments" : [ + { + "type" : "string", + "name" : "--sjdb_file_chr_start_end", + "description" : "path to the files with genomic coordinates (chr start end strand) for the splice junction introns. Multiple files can be supplied and will be concatenated.", + "info" : { + "orig_name" : "--sjdbFileChrStartEnd" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sjdb_gtf_file", + "description" : "path to the GTF file with annotations", + "info" : { + "orig_name" : "--sjdbGTFfile" + }, + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdb_gtf_chr_prefix", + "description" : "prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes)", + "info" : { + "orig_name" : "--sjdbGTFchrPrefix" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdb_gtf_feature_exon", + "description" : "feature type in GTF file to be used as exons for building transcripts", + "info" : { + "orig_name" : "--sjdbGTFfeatureExon" + }, + "example" : [ + "exon" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdb_gtf_tag_exon_parent_transcript", + "description" : "GTF attribute name for parent transcript ID (default \\"transcript_id\\" works for GTF files)", + "info" : { + "orig_name" : "--sjdbGTFtagExonParentTranscript" + }, + "example" : [ + "transcript_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdb_gtf_tag_exon_parent_gene", + "description" : "GTF attribute name for parent gene ID (default \\"gene_id\\" works for GTF files)", + "info" : { + "orig_name" : "--sjdbGTFtagExonParentGene" + }, + "example" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdb_gtf_tag_exon_parent_gene_name", + "description" : "GTF attribute name for parent gene name", + "info" : { + "orig_name" : "--sjdbGTFtagExonParentGeneName" + }, + "example" : [ + "gene_name" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdb_gtf_tag_exon_parent_gene_type", + "description" : "GTF attribute name for parent gene type", + "info" : { + "orig_name" : "--sjdbGTFtagExonParentGeneType" + }, + "example" : [ + "gene_type", + "gene_biotype" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sjdb_overhang", + "description" : "length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)", + "info" : { + "orig_name" : "--sjdbOverhang" + }, + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sjdb_score", + "description" : "extra alignment score for alignments that cross database junctions", + "info" : { + "orig_name" : "--sjdbScore" + }, + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdb_insert_save", + "description" : "which files to save when sjdb junctions are inserted on the fly at the mapping step\n\n- Basic ... only small junction / transcript files\n- All ... all files including big Genome, SA and SAindex - this will create a complete genome directory", + "info" : { + "orig_name" : "--sjdbInsertSave" + }, + "example" : [ + "Basic" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Variation parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--var_vcf_file", + "description" : "path to the VCF file that contains variation data. The 10th column should contain the genotype information, e.g. 0/1", + "info" : { + "orig_name" : "--varVCFfile" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Read Parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--read_files_type", + "description" : "format of input read files\n\n- Fastx ... FASTA or FASTQ\n- SAM SE ... SAM or BAM single-end reads; for BAM use --read_files_command samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use --read_files_command samtools view", + "info" : { + "orig_name" : "--readFilesType" + }, + "example" : [ + "Fastx" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--read_files_sam_attr_keep", + "description" : "for --read_files_type SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n- None ... do not keep any tags", + "info" : { + "orig_name" : "--readFilesSAMattrKeep" + }, + "example" : [ + "All" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read_files_manifest", + "description" : "path to the \\"manifest\\" file with the names of read files. The manifest file should contain 3 tab-separated columns:\n\npaired-end reads: read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces, but not tabs are allowed in file names.\nIf read_group_line does not start with ID:, it can only contain one ID field, and ID: will be added to it.\nIf read_group_line starts with ID:, it can contain several fields separated by $tab$, and all fields will be be copied verbatim into SAM @RG header line.", + "info" : { + "orig_name" : "--readFilesManifest" + }, + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--read_files_prefix", + "description" : "prefix for the read files names, i.e. it will be added in front of the strings in --readFilesIn", + "info" : { + "orig_name" : "--readFilesPrefix" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--read_files_command", + "description" : "command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout\n\nFor example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc.", + "info" : { + "orig_name" : "--readFilesCommand" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--read_map_number", + "description" : "number of reads to map from the beginning of the file\n\n-1: map all reads", + "info" : { + "orig_name" : "--readMapNumber" + }, + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--read_mates_lengths_in", + "description" : "Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same. NotEqual is safe in all situations.", + "info" : { + "orig_name" : "--readMatesLengthsIn" + }, + "example" : [ + "NotEqual" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--read_name_separator", + "description" : "character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)", + "info" : { + "orig_name" : "--readNameSeparator" + }, + "example" : [ + "/" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--read_quality_score_base", + "description" : "number to be subtracted from the ASCII code to get Phred quality score", + "info" : { + "orig_name" : "--readQualityScoreBase" + }, + "example" : [ + 33 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Read Clipping", + "arguments" : [ + { + "type" : "string", + "name" : "--clip_adapter_type", + "description" : "adapter clipping type\n\n- Hamming ... adapter clipping based on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n- CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None ... no adapter clipping, all other clip* parameters are disregarded", + "info" : { + "orig_name" : "--clipAdapterType" + }, + "example" : [ + "Hamming" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--clip3p_nbases", + "description" : "number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.", + "info" : { + "orig_name" : "--clip3pNbases" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--clip3p_adapter_seq", + "description" : "adapter sequences to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.\n\n- polyA ... polyA sequence with the length equal to read length", + "info" : { + "orig_name" : "--clip3pAdapterSeq" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--clip3p_adapter_mm_p", + "description" : "max proportion of mismatches for 3p adapter clipping for each mate. If one value is given, it will be assumed the same for both mates.", + "info" : { + "orig_name" : "--clip3pAdapterMMp" + }, + "example" : [ + 0.1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--clip3p_after_adapter_nbases", + "description" : "number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates.", + "info" : { + "orig_name" : "--clip3pAfterAdapterNbases" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--clip5p_nbases", + "description" : "number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates.", + "info" : { + "orig_name" : "--clip5pNbases" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Limits", + "arguments" : [ + { + "type" : "long", + "name" : "--limit_genome_generate_ram", + "description" : "maximum available RAM (bytes) for genome generation", + "info" : { + "orig_name" : "--limitGenomeGenerateRAM" + }, + "example" : [ + 31000000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "long", + "name" : "--limit_io_buffer_size", + "description" : "max available buffers size (bytes) for input/output, per thread", + "info" : { + "orig_name" : "--limitIObufferSize" + }, + "example" : [ + 30000000, + 50000000 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "long", + "name" : "--limit_out_sam_one_read_bytes", + "description" : "max size of the SAM record (bytes) for one read. Recommended value: >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax", + "info" : { + "orig_name" : "--limitOutSAMoneReadBytes" + }, + "example" : [ + 100000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--limit_out_sj_one_read", + "description" : "max number of junctions for one read (including all multi-mappers)", + "info" : { + "orig_name" : "--limitOutSJoneRead" + }, + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--limit_out_sj_collapsed", + "description" : "max number of collapsed junctions", + "info" : { + "orig_name" : "--limitOutSJcollapsed" + }, + "example" : [ + 1000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "long", + "name" : "--limit_bam_sort_ram", + "description" : "maximum available RAM (bytes) for sorting BAM. If =0, it will be set to the genome index size. 0 value can only be used with --genome_load NoSharedMemory option.", + "info" : { + "orig_name" : "--limitBAMsortRAM" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--limit_sjdb_insert_nsj", + "description" : "maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run", + "info" : { + "orig_name" : "--limitSjdbInsertNsj" + }, + "example" : [ + 1000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--limit_nreads_soft", + "description" : "soft limit on the number of reads", + "info" : { + "orig_name" : "--limitNreadsSoft" + }, + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output: general", + "arguments" : [ + { + "type" : "string", + "name" : "--out_tmp_keep", + "description" : "whether to keep the temporary files after STAR runs is finished\n\n- None ... remove all temporary files\n- All ... keep all files", + "info" : { + "orig_name" : "--outTmpKeep" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_std", + "description" : "which output will be directed to stdout (standard out)\n\n- Log ... log messages\n- SAM ... alignments in SAM format (which normally are output to Aligned.out.sam file), normal standard output will go into Log.std.out\n- BAM_Unsorted ... alignments in BAM format, unsorted. Requires --out_sam_type BAM Unsorted\n- BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --out_sam_type BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome in BAM format, unsorted. Requires --quant_mode TranscriptomeSAM", + "info" : { + "orig_name" : "--outStd" + }, + "example" : [ + "Log" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_reads_unmapped", + "description" : "output of unmapped and partially mapped (i.e. mapped only one mate of a paired end read) reads in separate file(s).\n\n- None ... no output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2", + "info" : { + "orig_name" : "--outReadsUnmapped" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_qs_conversion_add", + "description" : "add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)", + "info" : { + "orig_name" : "--outQSconversionAdd" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_multimapper_order", + "description" : "order of multimapping alignments in the output files\n\n- Old_2.4 ... quasi-random order used before 2.5.0\n- Random ... random order of alignments for each multi-mapper. Read mates (pairs) are always adjacent, all alignment for each read stay together. This option will become default in the future releases.", + "info" : { + "orig_name" : "--outMultimapperOrder" + }, + "example" : [ + "Old_2.4" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output: SAM and BAM", + "arguments" : [ + { + "type" : "string", + "name" : "--out_sam_type", + "description" : "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate ... sorted by coordinate. This option will allocate extra memory for sorting which can be specified by --limit_bam_sort_ram.", + "info" : { + "orig_name" : "--outSAMtype" + }, + "example" : [ + "SAM" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_sam_mode", + "description" : "mode of SAM output\n\n- None ... no SAM output\n- Full ... full SAM output\n- NoQS ... full SAM but without quality scores", + "info" : { + "orig_name" : "--outSAMmode" + }, + "example" : [ + "Full" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_sam_strand_field", + "description" : "Cufflinks-like strand field flag\n\n- None ... not used\n- intronMotif ... strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out.", + "info" : { + "orig_name" : "--outSAMstrandField" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_sam_attributes", + "description" : "a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n- None ... no attributes\n- Standard ... NH HI AS nM\n- All ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ... number of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard SAM tag.\n- HI ... multiple alignment index, starts with --out_sam_attr_ih_start (=1 by default). Standard SAM tag.\n- AS ... local alignment score, +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n- nM ... number of mismatches. For PE reads, sum over two mates.\n- NM ... edit distance to the reference (number of mismatched + inserted + deleted bases) for each mate. Standard SAM tag.\n- MD ... string encoding mismatched and deleted reference bases (see standard SAM specifications). Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.\n- jI ... start and end of introns for all junctions (1-based).\n- XS ... alignment strand according to --out_sam_strand_field.\n- MC ... mate's CIGAR string. Standard SAM tag.\n- ch ... marks all segment of all chimeric alingments for --chim_out_type WithinBAM output.\n- cN ... number of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA ... variant allele\n- vG ... genomic coordinate of the variant overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --wasp_output_mode SAMtag.\n- ha ... haplotype (1/2) when mapping to the diploid genome. Requires genome generated with --genomeTransformType Diploid .\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ... gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --out_sam_type BAM SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ ... quality of the entire barcode.\n- sF ... type of feature overlap and number of features for each alignment\n***Unsupported/undocumented:\n- rB ... alignment block read/genomic coordinates.\n- vR ... read coordinate of the variant.", + "info" : { + "orig_name" : "--outSAMattributes" + }, + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_sam_attr_ih_start", + "description" : "start value for the IH attribute. 0 may be required by some downstream software, such as Cufflinks or StringTie.", + "info" : { + "orig_name" : "--outSAMattrIHstart" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_sam_unmapped", + "description" : "output of unmapped reads in the SAM format\n\n1st word:\n- None ... no output\n- Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped mate for each alignment, and, in case of unsorted output, keep it adjacent to its mapped mate. Only affects multi-mapping reads.", + "info" : { + "orig_name" : "--outSAMunmapped" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_sam_order", + "description" : "type of sorting for the SAM output\n\nPaired: one mate after the other for all paired alignments\nPairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files", + "info" : { + "orig_name" : "--outSAMorder" + }, + "example" : [ + "Paired" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_sam_primary_flag", + "description" : "which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with the best score is primary\n- AllBestScore ... all alignments with the best score are primary", + "info" : { + "orig_name" : "--outSAMprimaryFlag" + }, + "example" : [ + "OneBestScore" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_sam_read_id", + "description" : "read ID record type\n\n- Standard ... first word (until space) from the FASTx read ID line, removing /1,/2 from the end\n- Number ... read number (index) in the FASTx file", + "info" : { + "orig_name" : "--outSAMreadID" + }, + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_sam_mapq_unique", + "description" : "0 to 255: the MAPQ value for unique mappers", + "info" : { + "orig_name" : "--outSAMmapqUnique" + }, + "example" : [ + 255 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_sam_flag_or", + "description" : "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e. FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by STAR, and after outSAMflagAND. Can be used to set specific bits that are not set otherwise.", + "info" : { + "orig_name" : "--outSAMflagOR" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_sam_flag_and", + "description" : "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e. FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by STAR, but before outSAMflagOR. Can be used to unset specific bits that are not set otherwise.", + "info" : { + "orig_name" : "--outSAMflagAND" + }, + "example" : [ + 65535 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_sam_attr_rg_line", + "description" : "SAM/BAM read group line. The first word contains the read group identifier and must start with \\"ID:\\", e.g. --out_sam_attr_rg_line ID:xxx CN:yy \\"DS:z z z\\".\n\nxxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.\nComma separated RG lines correspons to different (comma separated) input files in --readFilesIn. Commas have to be surrounded by spaces, e.g.\n--out_sam_attr_rg_line ID:xxx , ID:zzz \\"DS:z z\\" , ID:yyy DS:yyyy", + "info" : { + "orig_name" : "--outSAMattrRGline" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_sam_header_hd", + "description" : "@HD (header) line of the SAM header", + "info" : { + "orig_name" : "--outSAMheaderHD" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_sam_header_pg", + "description" : "extra @PG (software) line of the SAM header (in addition to STAR)", + "info" : { + "orig_name" : "--outSAMheaderPG" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_sam_header_comment_file", + "description" : "path to the file with @CO (comment) lines of the SAM header", + "info" : { + "orig_name" : "--outSAMheaderCommentFile" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_sam_filter", + "description" : "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genome_fasta_files at the mapping stage.\n- KeepAllAddedReferences ... keep all alignments to the extra reference sequences added with --genome_fasta_files at the mapping stage.", + "info" : { + "orig_name" : "--outSAMfilter" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_sam_mult_nmax", + "description" : "max number of multiple alignments for a read that will be output to the SAM/BAM files. Note that if this value is not equal to -1, the top scoring alignment will be output first\n\n- -1 ... all alignments (up to --out_filter_multimap_nmax) will be output", + "info" : { + "orig_name" : "--outSAMmultNmax" + }, + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_sam_tlen", + "description" : "calculation method for the TLEN field in the SAM/BAM files\n\n- 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends", + "info" : { + "orig_name" : "--outSAMtlen" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_bam_compression", + "description" : "-1 to 10 BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression", + "info" : { + "orig_name" : "--outBAMcompression" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_bam_sorting_thread_n", + "description" : ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN).", + "info" : { + "orig_name" : "--outBAMsortingThreadN" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_bam_sorting_bins_n", + "description" : ">0: number of genome bins for coordinate-sorting", + "info" : { + "orig_name" : "--outBAMsortingBinsN" + }, + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "BAM processing", + "arguments" : [ + { + "type" : "string", + "name" : "--bam_remove_duplicates_type", + "description" : "mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- - ... no duplicate removal/marking\n- UniqueIdentical ... mark all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate unique mappers but not multimappers.", + "info" : { + "orig_name" : "--bamRemoveDuplicatesType" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bam_remove_duplicates_mate2bases_n", + "description" : "number of bases from the 5' of mate 2 to use in collapsing (e.g. for RAMPAGE)", + "info" : { + "orig_name" : "--bamRemoveDuplicatesMate2basesN" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output Wiggle", + "arguments" : [ + { + "type" : "string", + "name" : "--out_wig_type", + "description" : "type of signal output, e.g. \\"bedGraph\\" OR \\"bedGraph read1_5p\\". Requires sorted BAM: --out_sam_type BAM SortedByCoordinate .\n\n1st word:\n- None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from only 2nd read", + "info" : { + "orig_name" : "--outWigType" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_wig_strand", + "description" : "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate strands, str1 and str2\n- Unstranded ... collapsed strands", + "info" : { + "orig_name" : "--outWigStrand" + }, + "example" : [ + "Stranded" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_wig_references_prefix", + "description" : "prefix matching reference names to include in the output wiggle file, e.g. \\"chr\\", default \\"-\\" - include all references", + "info" : { + "orig_name" : "--outWigReferencesPrefix" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_wig_norm", + "description" : "type of normalization for the signal\n\n- RPM ... reads per million of mapped reads\n- None ... no normalization, \\"raw\\" counts", + "info" : { + "orig_name" : "--outWigNorm" + }, + "example" : [ + "RPM" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output Filtering", + "arguments" : [ + { + "type" : "string", + "name" : "--out_filter_type", + "description" : "type of filtering\n\n- Normal ... standard filtering using only current alignment\n- BySJout ... keep only those reads that contain junctions that passed filtering into SJ.out.tab", + "info" : { + "orig_name" : "--outFilterType" + }, + "example" : [ + "Normal" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_filter_multimap_score_range", + "description" : "the score range below the maximum score for multimapping alignments", + "info" : { + "orig_name" : "--outFilterMultimapScoreRange" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_filter_multimap_nmax", + "description" : "maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value.\n\nOtherwise no alignments will be output, and the read will be counted as \\"mapped to too many loci\\" in the Log.final.out .", + "info" : { + "orig_name" : "--outFilterMultimapNmax" + }, + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_filter_mismatch_nmax", + "description" : "alignment will be output only if it has no more mismatches than this value.", + "info" : { + "orig_name" : "--outFilterMismatchNmax" + }, + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--out_filter_mismatch_nover_lmax", + "description" : "alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value.", + "info" : { + "orig_name" : "--outFilterMismatchNoverLmax" + }, + "example" : [ + 0.3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--out_filter_mismatch_nover_read_lmax", + "description" : "alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value.", + "info" : { + "orig_name" : "--outFilterMismatchNoverReadLmax" + }, + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_filter_score_min", + "description" : "alignment will be output only if its score is higher than or equal to this value.", + "info" : { + "orig_name" : "--outFilterScoreMin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--out_filter_score_min_over_lread", + "description" : "same as outFilterScoreMin, but normalized to read length (sum of mates' lengths for paired-end reads)", + "info" : { + "orig_name" : "--outFilterScoreMinOverLread" + }, + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_filter_match_nmin", + "description" : "alignment will be output only if the number of matched bases is higher than or equal to this value.", + "info" : { + "orig_name" : "--outFilterMatchNmin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--out_filter_match_nmin_over_lread", + "description" : "sam as outFilterMatchNmin, but normalized to the read length (sum of mates' lengths for paired-end reads).", + "info" : { + "orig_name" : "--outFilterMatchNminOverLread" + }, + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_filter_intron_motifs", + "description" : "filter alignment using their motifs\n\n- None ... no filtering\n- RemoveNoncanonical ... filter out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated ... filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept.", + "info" : { + "orig_name" : "--outFilterIntronMotifs" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--out_filter_intron_strands", + "description" : "filter alignments\n\n- RemoveInconsistentStrands ... remove alignments that have junctions with inconsistent strands\n- None ... no filtering", + "info" : { + "orig_name" : "--outFilterIntronStrands" + }, + "example" : [ + "RemoveInconsistentStrands" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output splice junctions (SJ.out.tab)", + "arguments" : [ + { + "type" : "string", + "name" : "--out_sj_type", + "description" : "type of splice junction output\n\n- Standard ... standard SJ.out.tab output\n- None ... no splice junction output", + "info" : { + "orig_name" : "--outSJtype" + }, + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output Filtering: Splice Junctions", + "arguments" : [ + { + "type" : "string", + "name" : "--out_sj_filter_reads", + "description" : "which reads to consider for collapsed splice junctions output\n\n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely mapping reads only", + "info" : { + "orig_name" : "--outSJfilterReads" + }, + "example" : [ + "All" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_sj_filter_overhang_min", + "description" : "minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\ndoes not apply to annotated junctions", + "info" : { + "orig_name" : "--outSJfilterOverhangMin" + }, + "example" : [ + 30, + 12, + 12, + 12 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_sj_filter_count_unique_min", + "description" : "minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", + "info" : { + "orig_name" : "--outSJfilterCountUniqueMin" + }, + "example" : [ + 3, + 1, + 1, + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_sj_filter_count_total_min", + "description" : "minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", + "info" : { + "orig_name" : "--outSJfilterCountTotalMin" + }, + "example" : [ + 3, + 1, + 1, + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_sj_filter_dist_to_other_sj_min", + "description" : "minimum allowed distance to other junctions' donor/acceptor\n\ndoes not apply to annotated junctions", + "info" : { + "orig_name" : "--outSJfilterDistToOtherSJmin" + }, + "example" : [ + 10, + 0, + 5, + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--out_sj_filter_intron_max_vs_read_n", + "description" : "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ni.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\ndoes not apply to annotated junctions", + "info" : { + "orig_name" : "--outSJfilterIntronMaxVsReadN" + }, + "example" : [ + 50000, + 100000, + 200000 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Scoring", + "arguments" : [ + { + "type" : "integer", + "name" : "--score_gap", + "description" : "splice junction penalty (independent on intron motif)", + "info" : { + "orig_name" : "--scoreGap" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--score_gap_noncan", + "description" : "non-canonical junction penalty (in addition to scoreGap)", + "info" : { + "orig_name" : "--scoreGapNoncan" + }, + "example" : [ + -8 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--score_gap_gcag", + "description" : "GC/AG and CT/GC junction penalty (in addition to scoreGap)", + "info" : { + "orig_name" : "--scoreGapGCAG" + }, + "example" : [ + -4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--score_gap_atac", + "description" : "AT/AC and GT/AT junction penalty (in addition to scoreGap)", + "info" : { + "orig_name" : "--scoreGapATAC" + }, + "example" : [ + -8 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--score_genomic_length_log2scale", + "description" : "extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)", + "info" : { + "orig_name" : "--scoreGenomicLengthLog2scale" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--score_del_open", + "description" : "deletion open penalty", + "info" : { + "orig_name" : "--scoreDelOpen" + }, + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--score_del_base", + "description" : "deletion extension penalty per base (in addition to scoreDelOpen)", + "info" : { + "orig_name" : "--scoreDelBase" + }, + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--score_ins_open", + "description" : "insertion open penalty", + "info" : { + "orig_name" : "--scoreInsOpen" + }, + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--score_ins_base", + "description" : "insertion extension penalty per base (in addition to scoreInsOpen)", + "info" : { + "orig_name" : "--scoreInsBase" + }, + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--score_stitch_sj_shift", + "description" : "maximum score reduction while searching for SJ boundaries in the stitching step", + "info" : { + "orig_name" : "--scoreStitchSJshift" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Alignments and Seeding", + "arguments" : [ + { + "type" : "integer", + "name" : "--seed_search_start_lmax", + "description" : "defines the search start point through the read - the read is split into pieces no longer than this value", + "info" : { + "orig_name" : "--seedSearchStartLmax" + }, + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--seed_search_start_lmax_over_lread", + "description" : "seedSearchStartLmax normalized to read length (sum of mates' lengths for paired-end reads)", + "info" : { + "orig_name" : "--seedSearchStartLmaxOverLread" + }, + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seed_search_lmax", + "description" : "defines the maximum length of the seeds, if =0 seed length is not limited", + "info" : { + "orig_name" : "--seedSearchLmax" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seed_multimap_nmax", + "description" : "only pieces that map fewer than this value are utilized in the stitching procedure", + "info" : { + "orig_name" : "--seedMultimapNmax" + }, + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seed_per_read_nmax", + "description" : "max number of seeds per read", + "info" : { + "orig_name" : "--seedPerReadNmax" + }, + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seed_per_window_nmax", + "description" : "max number of seeds per window", + "info" : { + "orig_name" : "--seedPerWindowNmax" + }, + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seed_none_loci_per_window", + "description" : "max number of one seed loci per window", + "info" : { + "orig_name" : "--seedNoneLociPerWindow" + }, + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seed_split_min", + "description" : "min length of the seed sequ''' + '''ences split by Ns or mate gap", + "info" : { + "orig_name" : "--seedSplitMin" + }, + "example" : [ + 12 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seed_map_min", + "description" : "min length of seeds to be mapped", + "info" : { + "orig_name" : "--seedMapMin" + }, + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--align_intron_min", + "description" : "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion", + "info" : { + "orig_name" : "--alignIntronMin" + }, + "example" : [ + 21 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--align_intron_max", + "description" : "maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins", + "info" : { + "orig_name" : "--alignIntronMax" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--align_mates_gap_max", + "description" : "maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins", + "info" : { + "orig_name" : "--alignMatesGapMax" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--align_sj_overhang_min", + "description" : "minimum overhang (i.e. block size) for spliced alignments", + "info" : { + "orig_name" : "--alignSJoverhangMin" + }, + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--align_sj_stitch_mismatch_nmax", + "description" : "maximum number of mismatches for stitching of the splice junctions (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif.", + "info" : { + "orig_name" : "--alignSJstitchMismatchNmax" + }, + "example" : [ + 0, + -1, + 0, + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--align_sjdb_overhang_min", + "description" : "minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments", + "info" : { + "orig_name" : "--alignSJDBoverhangMin" + }, + "example" : [ + 3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--align_spliced_mate_map_lmin", + "description" : "minimum mapped length for a read mate that is spliced", + "info" : { + "orig_name" : "--alignSplicedMateMapLmin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--align_spliced_mate_map_lmin_over_lmate", + "description" : "alignSplicedMateMapLmin normalized to mate length", + "info" : { + "orig_name" : "--alignSplicedMateMapLminOverLmate" + }, + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--align_windows_per_read_nmax", + "description" : "max number of windows per read", + "info" : { + "orig_name" : "--alignWindowsPerReadNmax" + }, + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--align_transcripts_per_window_nmax", + "description" : "max number of transcripts per window", + "info" : { + "orig_name" : "--alignTranscriptsPerWindowNmax" + }, + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--align_transcripts_per_read_nmax", + "description" : "max number of different alignments per read to consider", + "info" : { + "orig_name" : "--alignTranscriptsPerReadNmax" + }, + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--align_ends_type", + "description" : "type of read ends alignment\n\n- Local ... standard local alignment with soft-clipping allowed\n- EndToEnd ... force end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12 ... fully extend only the 5p of the both read1 and read2, all other ends: local alignment", + "info" : { + "orig_name" : "--alignEndsType" + }, + "example" : [ + "Local" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--align_ends_protrude", + "description" : "allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate\n\n1st word: int: maximum number of protrusion bases allowed\n2nd word: string:\n- ConcordantPair ... report alignments with non-zero protrusion as concordant pairs\n- DiscordantPair ... report alignments with non-zero protrusion as discordant pairs", + "info" : { + "orig_name" : "--alignEndsProtrude" + }, + "example" : [ + "0 ConcordantPair" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--align_soft_clip_at_reference_ends", + "description" : "allow the soft-clipping of the alignments past the end of the chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility with Cufflinks", + "info" : { + "orig_name" : "--alignSoftClipAtReferenceEnds" + }, + "example" : [ + "Yes" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--align_insertion_flush", + "description" : "how to flush ambiguous insertion positions\n\n- None ... insertions are not flushed\n- Right ... insertions are flushed to the right", + "info" : { + "orig_name" : "--alignInsertionFlush" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Paired-End reads", + "arguments" : [ + { + "type" : "integer", + "name" : "--pe_overlap_nbases_min", + "description" : "minimum number of overlapping bases to trigger mates merging and realignment. Specify >0 value to switch on the \\"merginf of overlapping mates\\" algorithm.", + "info" : { + "orig_name" : "--peOverlapNbasesMin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--pe_overlap_mm_p", + "description" : "maximum proportion of mismatched bases in the overlap area", + "info" : { + "orig_name" : "--peOverlapMMp" + }, + "example" : [ + 0.01 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Windows, Anchors, Binning", + "arguments" : [ + { + "type" : "integer", + "name" : "--win_anchor_multimap_nmax", + "description" : "max number of loci anchors are allowed to map to", + "info" : { + "orig_name" : "--winAnchorMultimapNmax" + }, + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--win_bin_nbits", + "description" : "=log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins.", + "info" : { + "orig_name" : "--winBinNbits" + }, + "example" : [ + 16 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--win_anchor_dist_nbins", + "description" : "max number of bins between two anchors that allows aggregation of anchors into one window", + "info" : { + "orig_name" : "--winAnchorDistNbins" + }, + "example" : [ + 9 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--win_flank_nbins", + "description" : "log2(winFlank), where win Flank is the size of the left and right flanking regions for each window", + "info" : { + "orig_name" : "--winFlankNbins" + }, + "example" : [ + 4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--win_read_coverage_relative_min", + "description" : "minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only.", + "info" : { + "orig_name" : "--winReadCoverageRelativeMin" + }, + "example" : [ + 0.5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--win_read_coverage_bases_min", + "description" : "minimum number of bases covered by the seeds in a window , for STARlong algorithm only.", + "info" : { + "orig_name" : "--winReadCoverageBasesMin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Chimeric Alignments", + "arguments" : [ + { + "type" : "string", + "name" : "--chim_out_type", + "description" : "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n- SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n- WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n- WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental chimeric alignments", + "info" : { + "orig_name" : "--chimOutType" + }, + "example" : [ + "Junctions" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chim_segment_min", + "description" : "minimum length of chimeric segment length, if ==0, no chimeric output", + "info" : { + "orig_name" : "--chimSegmentMin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chim_score_min", + "description" : "minimum total (summed) score of the chimeric segments", + "info" : { + "orig_name" : "--chimScoreMin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chim_score_drop_max", + "description" : "max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length", + "info" : { + "orig_name" : "--chimScoreDropMax" + }, + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chim_score_separation", + "description" : "minimum difference (separation) between the best chimeric score and the next one", + "info" : { + "orig_name" : "--chimScoreSeparation" + }, + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chim_score_junction_non_gtag", + "description" : "penalty for a non-GT/AG chimeric junction", + "info" : { + "orig_name" : "--chimScoreJunctionNonGTAG" + }, + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chim_junction_overhang_min", + "description" : "minimum overhang for a chimeric junction", + "info" : { + "orig_name" : "--chimJunctionOverhangMin" + }, + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chim_segment_read_gap_max", + "description" : "maximum gap in the read sequence between chimeric segments", + "info" : { + "orig_name" : "--chimSegmentReadGapMax" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--chim_filter", + "description" : "different filters for chimeric alignments\n\n- None ... no filtering\n- banGenomicN ... Ns are not allowed in the genome sequence around the chimeric junction", + "info" : { + "orig_name" : "--chimFilter" + }, + "example" : [ + "banGenomicN" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chim_main_segment_mult_nmax", + "description" : "maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments.", + "info" : { + "orig_name" : "--chimMainSegmentMultNmax" + }, + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chim_multimap_nmax", + "description" : "maximum number of chimeric multi-alignments\n\n- 0 ... use the old scheme for chimeric detection which only considered unique alignments", + "info" : { + "orig_name" : "--chimMultimapNmax" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chim_multimap_score_range", + "description" : "the score range for multi-mapping chimeras below the best chimeric score. Only works with --chim_multimap_nmax > 1", + "info" : { + "orig_name" : "--chimMultimapScoreRange" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chim_nonchim_score_drop_min", + "description" : "to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value", + "info" : { + "orig_name" : "--chimNonchimScoreDropMin" + }, + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chim_out_junction_format", + "description" : "formatting type for the Chimeric.out.junction file\n\n- 0 ... no comment lines/headers\n- 1 ... comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping", + "info" : { + "orig_name" : "--chimOutJunctionFormat" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Quantification of Annotations", + "arguments" : [ + { + "type" : "string", + "name" : "--quant_mode", + "description" : "types of quantification requested\n\n- - ... none\n- TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate file\n- GeneCounts ... count reads per gene", + "info" : { + "orig_name" : "--quantMode" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--quant_transcriptome_bam_compression", + "description" : "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n- 10 ... maximum compression", + "info" : { + "orig_name" : "--quantTranscriptomeBAMcompression" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--quant_transcriptome_sam_output", + "description" : "alignment filtering for TranscriptomeSAM output\n\n- BanSingleEnd_BanIndels_ExtendSoftclip ... prohibit indels and single-end alignments, extend softclips - compatible with RSEM\n- BanSingleEnd ... prohibit single-end alignments, allow indels and softclips\n- BanSingleEnd_ExtendSoftclip ... prohibit single-end alignments, extend softclips, allow indels", + "info" : { + "orig_name" : "--quantTranscriptomeSAMoutput" + }, + "example" : [ + "BanSingleEnd_BanIndels_ExtendSoftclip" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "2-pass Mapping", + "arguments" : [ + { + "type" : "string", + "name" : "--twopass_mode", + "description" : "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic ... basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly", + "info" : { + "orig_name" : "--twopassMode" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--twopass1reads_n", + "description" : "number of reads to process for the 1st step. Use very large number (or default -1) to map all reads in the first step.", + "info" : { + "orig_name" : "--twopass1readsN" + }, + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "WASP parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--wasp_output_mode", + "description" : "WASP allele-specific output type. This is re-implementation of the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering", + "info" : { + "orig_name" : "--waspOutputMode" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "STARsolo (single cell RNA-seq) parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--solo_type", + "description" : "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI of fixed length and one adapter sequence of fixed length are allowed in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end] CellBarcode_read . Requires --out_sam_type BAM Unsorted [and/or SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired- or single-end), barcodes are corresponding read-groups, no UMI sequences, alignments deduplicated according to alignment start and end (after extending soft-clipped bases)", + "info" : { + "orig_name" : "--soloType" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_cb_type", + "description" : "cell barcode type\n\nSequence: cell barcode is a sequence (standard option)\nString: cell barcode is an arbitrary string", + "info" : { + "orig_name" : "--soloCBtype" + }, + "example" : [ + "Sequence" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_cb_whitelist", + "description" : "file(s) with whitelist(s) of cell barcodes. Only --solo_type CB_UMI_Complex allows more than one whitelist file.\n\n- None ... no whitelist: all cell barcodes are allowed", + "info" : { + "orig_name" : "--soloCBwhitelist" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--solo_cb_start", + "description" : "cell barcode start base", + "info" : { + "orig_name" : "--soloCBstart" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--solo_cb_len", + "description" : "cell barcode length", + "info" : { + "orig_name" : "--soloCBlen" + }, + "example" : [ + 16 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--solo_umi_start", + "description" : "UMI start base", + "info" : { + "orig_name" : "--soloUMIstart" + }, + "example" : [ + 17 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--solo_umi_len", + "description" : "UMI length", + "info" : { + "orig_name" : "--soloUMIlen" + }, + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--solo_barcode_read_length", + "description" : "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n- 0 ... not defined, do not check", + "info" : { + "orig_name" : "--soloBarcodeReadLength" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--solo_barcode_mate", + "description" : "identifies which read mate contains the barcode (CB+UMI) sequence\n\n- 0 ... barcode sequence is on separate read, which should always be the last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part of mate 1\n- 2 ... barcode sequence is a part of mate 2", + "info" : { + "orig_name" : "--soloBarcodeMate" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_cb_position", + "description" : "position of Cell Barcode(s) on the barcode read.\n\nPresently only works with --solo_type CB_UMI_Complex, and barcodes are assumed to be on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\nstart(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based position with of the CB start(end) with respect to the Anchor Base\nString for different barcodes are separated by space.\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--solo_cb_position 0_0_2_-1 3_1_3_8", + "info" : { + "orig_name" : "--soloCBposition" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_umi_position", + "description" : "position of the UMI on the barcode read, same as soloCBposition\n\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--solo_cb_position 3_9_3_14", + "info" : { + "orig_name" : "--soloUMIposition" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_adapter_sequence", + "description" : "adapter sequence to anchor barcodes. Only one adapter sequence is allowed.", + "info" : { + "orig_name" : "--soloAdapterSequence" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--solo_adapter_mismatches_nmax", + "description" : "maximum number of mismatches allowed in adapter sequence.", + "info" : { + "orig_name" : "--soloAdapterMismatchesNmax" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_cb_match_wl_type", + "description" : "matching the Cell Barcodes to the WhiteList\n\n- Exact ... only exact matches allowed\n- 1MM ... only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match.\n- 1MM_multi ... multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches.\nAllowed CBs have to have at least one read with exact match. This option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for CBs with N-bases. This option matches best with CellRanger >= 3.0.0\n- EditDist_2 ... allow up to edit distance of 3 fpr each of the barcodes. May include one deletion + one insertion. Only works with --solo_type CB_UMI_Complex. Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline.", + "info" : { + "orig_name" : "--soloCBmatchWLtype" + }, + "example" : [ + "1MM_multi" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_input_sam_attr_barcode_seq", + "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --solo_input_sam_attr_barcode_seq CR UR .\nThis parameter is required when running STARsolo with input from SAM.", + "info" : { + "orig_name" : "--soloInputSAMattrBarcodeSeq" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_input_sam_attr_barcode_qual", + "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --solo_input_sam_attr_barcode_qual CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned to all bases.", + "info" : { + "orig_name" : "--soloInputSAMattrBarcodeQual" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_strand", + "description" : "strandedness of the solo libraries:\n\n- Unstranded ... no strand information\n- Forward ... read strand same as the original RNA molecule\n- Reverse ... read strand opposite to the original RNA molecule", + "info" : { + "orig_name" : "--soloStrand" + }, + "example" : [ + "Forward" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_features", + "description" : "genomic features for which the UMI counts per Cell Barcode are collected\n\n- Gene ... genes: reads match the gene transcript\n- SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns: prioritize 100% overlap with exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads overlapping genes' exons and introns: prioritize >50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction.", + "info" : { + "orig_name" : "--soloFeatures" + }, + "example" : [ + "Gene" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_multi_mappers", + "description" : "counting method for reads mapping to multiple genes\n\n- Unique ... count only reads that map to unique genes\n- Uniform ... uniformly distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique ... distribute UMIs proportionally to unique mappers, if present, and uniformly if not.\n- EM ... multi-gene UMIs are distributed using Expectation Maximization algorithm", + "info" : { + "orig_name" : "--soloMultiMappers" + }, + "example" : [ + "Unique" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_umi_dedup", + "description" : "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All ... all UMIs with 1 mismatch distance to each other are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows the \\"directional\\" method from the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools, but with more stringent criteria for duplicate UMIs\n- Exact ... only exactly matching UMIs are collapsed.\n- NoDedup ... no deduplication of UMIs, count all reads.\n- 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI collapsing.", + "info" : { + "orig_name" : "--soloUMIdedup" + }, + "example" : [ + "1MM_All" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_umi_filtering", + "description" : "type of UMI filtering (for reads uniquely mapping to genes)\n\n- - ... basic filtering: remove UMIs with N and homopolymers (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ... basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0 .\nOnly works with --solo_umi_dedup 1MM_CR", + "info" : { + "orig_name" : "--soloUMIfiltering" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_out_file_names", + "description" : "file names for STARsolo output:\n\nfile_name_prefix gene_names barcode_sequences cell_feature_count_matrix", + "info" : { + "orig_name" : "--soloOutFileNames" + }, + "example" : [ + "Solo.out/", + "features.tsv", + "barcodes.tsv", + "matrix.mtx" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_cell_filter", + "description" : "cell filtering type and parameters\n\n- None ... do not output filtered cells\n- TopCells ... only report top cells by UMI count, followed by the exact number of cells\n- CellRanger2.2 ... simple filtering of CellRanger 2.2.\nCan be followed by numbers: number of expected cells, robust maximum percentile for UMI count, maximum to minimum ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000; maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\nCan be followed by 10 numeric parameters: nExpectedCells maxPercentile maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN FDR simN\nThe harcoded values are from CellRanger: 3000 0.99 10 45000 90000 500 0.01 20000 0.01 10000", + "info" : { + "orig_name" : "--soloCellFilter" + }, + "example" : [ + "CellRanger2.2", + "3000", + "0.99", + "10" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_out_format_features_gene_field3", + "description" : "field 3 in the Gene features.tsv file. If \\"-\\", then no 3rd field is output.", + "info" : { + "orig_name" : "--soloOutFormatFeaturesGeneField3" + }, + "example" : [ + "Gene Expression" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--solo_cell_read_stats", + "description" : "Output reads statistics for each CB\n\n- Standard ... standard output", + "info" : { + "orig_name" : "--soloCellReadStats" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "--readFilesIn" + ], + "description" : "The single-end or paired-end R1 FastQ files to be processed.", + "example" : [ + "mysample_S1_L001_R1_001.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_r2", + "description" : "The paired-end R2 FastQ files to be processed. Only required if --input is a paired-end R1 file.", + "example" : [ + "mysample_S1_L001_R2_001.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--aligned_reads", + "description" : "The output file containing the aligned reads.", + "example" : [ + "aligned_reads.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--reads_per_gene", + "description" : "The output file containing the number of reads per gene.", + "example" : [ + "reads_per_gene.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--unmapped", + "description" : "The output file containing the unmapped reads.", + "example" : [ + "unmapped.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--unmapped_r2", + "description" : "The output file containing the unmapped R2 reads.", + "example" : [ + "unmapped_r2.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chimeric_junctions", + "description" : "The output file containing the chimeric junctions.", + "example" : [ + "chimeric_junctions.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--log", + "description" : "The output file containing the log of the alignment process.", + "example" : [ + "log.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--splice_junctions", + "description" : "The output file containing the splice junctions.", + "example" : [ + "splice_junctions.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--reads_aligned_to_transcriptome", + "description" : "The output file containing the alignments to transcriptome in BAM formats. This file is generated when --quantMode is set to TranscriptomeSAM.", + "example" : [ + "transcriptome_aligned.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true + } + ], + "description" : "Aligns reads to a reference genome using STAR.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "align", + "fasta", + "genome" + ], + "license" : "MIT", + "references" : { + "doi" : [ + "10.1093/bioinformatics/bts635" + ] + }, + "links" : { + "repository" : "https://github.com/alexdobin/STAR", + "documentation" : "https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.12-slim", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps", + "gzip", + "bzip2" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\n apt-get install -y --no-install-recommends ${PACKAGES} && \\\\\n cd /tmp && \\\\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\n unzip ${STAR_VERSION}.zip && \\\\\n cd STAR-${STAR_VERSION}/source && \\\\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\n cp STAR /usr/local/bin && \\\\\n cd / && \\\\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\n apt-get --purge autoremove -y ${PACKAGES} && \\\\\n apt-get clean\n" + ], + "env" : [ + "STAR_VERSION 2.7.11b", + "PACKAGES gcc g++ make wget zlib1g-dev unzip xxd" + ] + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "pyyaml" + ], + "upgrade" : true + }, + { + "type" : "docker", + "run" : [ + "STAR --version | sed 's#\\\\(.*\\\\)#star: \\"\\\\1\\"#' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/star/star_align_reads/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/star/star_align_reads", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.py" +cat > "$tempscript" << VIASHMAIN +import tempfile +import subprocess +import shutil +from pathlib import Path +import yaml + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'run_rng_seed': $( if [ ! -z ${VIASH_PAR_RUN_RNG_SEED+x} ]; then echo "int(r'${VIASH_PAR_RUN_RNG_SEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'genome_dir': $( if [ ! -z ${VIASH_PAR_GENOME_DIR+x} ]; then echo "r'${VIASH_PAR_GENOME_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'genome_load': $( if [ ! -z ${VIASH_PAR_GENOME_LOAD+x} ]; then echo "r'${VIASH_PAR_GENOME_LOAD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'genome_fasta_files': $( if [ ! -z ${VIASH_PAR_GENOME_FASTA_FILES+x} ]; then echo "r'${VIASH_PAR_GENOME_FASTA_FILES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'genome_file_sizes': $( if [ ! -z ${VIASH_PAR_GENOME_FILE_SIZES+x} ]; then echo "list(map(int, r'${VIASH_PAR_GENOME_FILE_SIZES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'genome_transform_output': $( if [ ! -z ${VIASH_PAR_GENOME_TRANSFORM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_GENOME_TRANSFORM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'genome_chr_set_mitochondrial': $( if [ ! -z ${VIASH_PAR_GENOME_CHR_SET_MITOCHONDRIAL+x} ]; then echo "r'${VIASH_PAR_GENOME_CHR_SET_MITOCHONDRIAL//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdb_file_chr_start_end': $( if [ ! -z ${VIASH_PAR_SJDB_FILE_CHR_START_END+x} ]; then echo "r'${VIASH_PAR_SJDB_FILE_CHR_START_END//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdb_gtf_file': $( if [ ! -z ${VIASH_PAR_SJDB_GTF_FILE+x} ]; then echo "r'${VIASH_PAR_SJDB_GTF_FILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdb_gtf_chr_prefix': $( if [ ! -z ${VIASH_PAR_SJDB_GTF_CHR_PREFIX+x} ]; then echo "r'${VIASH_PAR_SJDB_GTF_CHR_PREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdb_gtf_feature_exon': $( if [ ! -z ${VIASH_PAR_SJDB_GTF_FEATURE_EXON+x} ]; then echo "r'${VIASH_PAR_SJDB_GTF_FEATURE_EXON//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdb_gtf_tag_exon_parent_transcript': $( if [ ! -z ${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_TRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_TRANSCRIPT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdb_gtf_tag_exon_parent_gene': $( if [ ! -z ${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_GENE+x} ]; then echo "r'${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_GENE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdb_gtf_tag_exon_parent_gene_name': $( if [ ! -z ${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_GENE_NAME+x} ]; then echo "r'${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_GENE_NAME//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdb_gtf_tag_exon_parent_gene_type': $( if [ ! -z ${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_GENE_TYPE+x} ]; then echo "r'${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_GENE_TYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdb_overhang': $( if [ ! -z ${VIASH_PAR_SJDB_OVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDB_OVERHANG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sjdb_score': $( if [ ! -z ${VIASH_PAR_SJDB_SCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDB_SCORE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sjdb_insert_save': $( if [ ! -z ${VIASH_PAR_SJDB_INSERT_SAVE+x} ]; then echo "r'${VIASH_PAR_SJDB_INSERT_SAVE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'var_vcf_file': $( if [ ! -z ${VIASH_PAR_VAR_VCF_FILE+x} ]; then echo "r'${VIASH_PAR_VAR_VCF_FILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'read_files_type': $( if [ ! -z ${VIASH_PAR_READ_FILES_TYPE+x} ]; then echo "r'${VIASH_PAR_READ_FILES_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'read_files_sam_attr_keep': $( if [ ! -z ${VIASH_PAR_READ_FILES_SAM_ATTR_KEEP+x} ]; then echo "r'${VIASH_PAR_READ_FILES_SAM_ATTR_KEEP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'read_files_manifest': $( if [ ! -z ${VIASH_PAR_READ_FILES_MANIFEST+x} ]; then echo "r'${VIASH_PAR_READ_FILES_MANIFEST//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'read_files_prefix': $( if [ ! -z ${VIASH_PAR_READ_FILES_PREFIX+x} ]; then echo "r'${VIASH_PAR_READ_FILES_PREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'read_files_command': $( if [ ! -z ${VIASH_PAR_READ_FILES_COMMAND+x} ]; then echo "r'${VIASH_PAR_READ_FILES_COMMAND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'read_map_number': $( if [ ! -z ${VIASH_PAR_READ_MAP_NUMBER+x} ]; then echo "int(r'${VIASH_PAR_READ_MAP_NUMBER//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'read_mates_lengths_in': $( if [ ! -z ${VIASH_PAR_READ_MATES_LENGTHS_IN+x} ]; then echo "r'${VIASH_PAR_READ_MATES_LENGTHS_IN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'read_name_separator': $( if [ ! -z ${VIASH_PAR_READ_NAME_SEPARATOR+x} ]; then echo "r'${VIASH_PAR_READ_NAME_SEPARATOR//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'read_quality_score_base': $( if [ ! -z ${VIASH_PAR_READ_QUALITY_SCORE_BASE+x} ]; then echo "int(r'${VIASH_PAR_READ_QUALITY_SCORE_BASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'clip_adapter_type': $( if [ ! -z ${VIASH_PAR_CLIP_ADAPTER_TYPE+x} ]; then echo "r'${VIASH_PAR_CLIP_ADAPTER_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'clip3p_nbases': $( if [ ! -z ${VIASH_PAR_CLIP3P_NBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3P_NBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip3p_adapter_seq': $( if [ ! -z ${VIASH_PAR_CLIP3P_ADAPTER_SEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3P_ADAPTER_SEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'clip3p_adapter_mm_p': $( if [ ! -z ${VIASH_PAR_CLIP3P_ADAPTER_MM_P+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3P_ADAPTER_MM_P//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip3p_after_adapter_nbases': $( if [ ! -z ${VIASH_PAR_CLIP3P_AFTER_ADAPTER_NBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3P_AFTER_ADAPTER_NBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip5p_nbases': $( if [ ! -z ${VIASH_PAR_CLIP5P_NBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5P_NBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'limit_genome_generate_ram': $( if [ ! -z ${VIASH_PAR_LIMIT_GENOME_GENERATE_RAM+x} ]; then echo "int(r'${VIASH_PAR_LIMIT_GENOME_GENERATE_RAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limit_io_buffer_size': $( if [ ! -z ${VIASH_PAR_LIMIT_IO_BUFFER_SIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMIT_IO_BUFFER_SIZE//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'limit_out_sam_one_read_bytes': $( if [ ! -z ${VIASH_PAR_LIMIT_OUT_SAM_ONE_READ_BYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMIT_OUT_SAM_ONE_READ_BYTES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limit_out_sj_one_read': $( if [ ! -z ${VIASH_PAR_LIMIT_OUT_SJ_ONE_READ+x} ]; then echo "int(r'${VIASH_PAR_LIMIT_OUT_SJ_ONE_READ//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limit_out_sj_collapsed': $( if [ ! -z ${VIASH_PAR_LIMIT_OUT_SJ_COLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMIT_OUT_SJ_COLLAPSED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limit_bam_sort_ram': $( if [ ! -z ${VIASH_PAR_LIMIT_BAM_SORT_RAM+x} ]; then echo "int(r'${VIASH_PAR_LIMIT_BAM_SORT_RAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limit_sjdb_insert_nsj': $( if [ ! -z ${VIASH_PAR_LIMIT_SJDB_INSERT_NSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMIT_SJDB_INSERT_NSJ//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limit_nreads_soft': $( if [ ! -z ${VIASH_PAR_LIMIT_NREADS_SOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMIT_NREADS_SOFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_tmp_keep': $( if [ ! -z ${VIASH_PAR_OUT_TMP_KEEP+x} ]; then echo "r'${VIASH_PAR_OUT_TMP_KEEP//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_std': $( if [ ! -z ${VIASH_PAR_OUT_STD+x} ]; then echo "r'${VIASH_PAR_OUT_STD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_reads_unmapped': $( if [ ! -z ${VIASH_PAR_OUT_READS_UNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUT_READS_UNMAPPED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_qs_conversion_add': $( if [ ! -z ${VIASH_PAR_OUT_QS_CONVERSION_ADD+x} ]; then echo "int(r'${VIASH_PAR_OUT_QS_CONVERSION_ADD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_multimapper_order': $( if [ ! -z ${VIASH_PAR_OUT_MULTIMAPPER_ORDER+x} ]; then echo "r'${VIASH_PAR_OUT_MULTIMAPPER_ORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_sam_type': $( if [ ! -z ${VIASH_PAR_OUT_SAM_TYPE+x} ]; then echo "r'${VIASH_PAR_OUT_SAM_TYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'out_sam_mode': $( if [ ! -z ${VIASH_PAR_OUT_SAM_MODE+x} ]; then echo "r'${VIASH_PAR_OUT_SAM_MODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_sam_strand_field': $( if [ ! -z ${VIASH_PAR_OUT_SAM_STRAND_FIELD+x} ]; then echo "r'${VIASH_PAR_OUT_SAM_STRAND_FIELD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_sam_attributes': $( if [ ! -z ${VIASH_PAR_OUT_SAM_ATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUT_SAM_ATTRIBUTES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'out_sam_attr_ih_start': $( if [ ! -z ${VIASH_PAR_OUT_SAM_ATTR_IH_START+x} ]; then echo "int(r'${VIASH_PAR_OUT_SAM_ATTR_IH_START//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_sam_unmapped': $( if [ ! -z ${VIASH_PAR_OUT_SAM_UNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUT_SAM_UNMAPPED//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'out_sam_order': $( if [ ! -z ${VIASH_PAR_OUT_SAM_ORDER+x} ]; then echo "r'${VIASH_PAR_OUT_SAM_ORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_sam_primary_flag': $( if [ ! -z ${VIASH_PAR_OUT_SAM_PRIMARY_FLAG+x} ]; then echo "r'${VIASH_PAR_OUT_SAM_PRIMARY_FLAG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_sam_read_id': $( if [ ! -z ${VIASH_PAR_OUT_SAM_READ_ID+x} ]; then echo "r'${VIASH_PAR_OUT_SAM_READ_ID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_sam_mapq_unique': $( if [ ! -z ${VIASH_PAR_OUT_SAM_MAPQ_UNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUT_SAM_MAPQ_UNIQUE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_sam_flag_or': $( if [ ! -z ${VIASH_PAR_OUT_SAM_FLAG_OR+x} ]; then echo "int(r'${VIASH_PAR_OUT_SAM_FLAG_OR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_sam_flag_and': $( if [ ! -z ${VIASH_PAR_OUT_SAM_FLAG_AND+x} ]; then echo "int(r'${VIASH_PAR_OUT_SAM_FLAG_AND//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_sam_attr_rg_line': $( if [ ! -z ${VIASH_PAR_OUT_SAM_ATTR_RG_LINE+x} ]; then echo "r'${VIASH_PAR_OUT_SAM_ATTR_RG_LINE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'out_sam_header_hd': $( if [ ! -z ${VIASH_PAR_OUT_SAM_HEADER_HD+x} ]; then echo "r'${VIASH_PAR_OUT_SAM_HEADER_HD//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'out_sam_header_pg': $( if [ ! -z ${VIASH_PAR_OUT_SAM_HEADER_PG+x} ]; then echo "r'${VIASH_PAR_OUT_SAM_HEADER_PG//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'out_sam_header_comment_file': $( if [ ! -z ${VIASH_PAR_OUT_SAM_HEADER_COMMENT_FILE+x} ]; then echo "r'${VIASH_PAR_OUT_SAM_HEADER_COMMENT_FILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_sam_filter': $( if [ ! -z ${VIASH_PAR_OUT_SAM_FILTER+x} ]; then echo "r'${VIASH_PAR_OUT_SAM_FILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'out_sam_mult_nmax': $( if [ ! -z ${VIASH_PAR_OUT_SAM_MULT_NMAX+x} ]; then echo "int(r'${VIASH_PAR_OUT_SAM_MULT_NMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_sam_tlen': $( if [ ! -z ${VIASH_PAR_OUT_SAM_TLEN+x} ]; then echo "int(r'${VIASH_PAR_OUT_SAM_TLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_bam_compression': $( if [ ! -z ${VIASH_PAR_OUT_BAM_COMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUT_BAM_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_bam_sorting_thread_n': $( if [ ! -z ${VIASH_PAR_OUT_BAM_SORTING_THREAD_N+x} ]; then echo "int(r'${VIASH_PAR_OUT_BAM_SORTING_THREAD_N//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_bam_sorting_bins_n': $( if [ ! -z ${VIASH_PAR_OUT_BAM_SORTING_BINS_N+x} ]; then echo "int(r'${VIASH_PAR_OUT_BAM_SORTING_BINS_N//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'bam_remove_duplicates_type': $( if [ ! -z ${VIASH_PAR_BAM_REMOVE_DUPLICATES_TYPE+x} ]; then echo "r'${VIASH_PAR_BAM_REMOVE_DUPLICATES_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'bam_remove_duplicates_mate2bases_n': $( if [ ! -z ${VIASH_PAR_BAM_REMOVE_DUPLICATES_MATE2BASES_N+x} ]; then echo "int(r'${VIASH_PAR_BAM_REMOVE_DUPLICATES_MATE2BASES_N//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_wig_type': $( if [ ! -z ${VIASH_PAR_OUT_WIG_TYPE+x} ]; then echo "r'${VIASH_PAR_OUT_WIG_TYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'out_wig_strand': $( if [ ! -z ${VIASH_PAR_OUT_WIG_STRAND+x} ]; then echo "r'${VIASH_PAR_OUT_WIG_STRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_wig_references_prefix': $( if [ ! -z ${VIASH_PAR_OUT_WIG_REFERENCES_PREFIX+x} ]; then echo "r'${VIASH_PAR_OUT_WIG_REFERENCES_PREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_wig_norm': $( if [ ! -z ${VIASH_PAR_OUT_WIG_NORM+x} ]; then echo "r'${VIASH_PAR_OUT_WIG_NORM//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_filter_type': $( if [ ! -z ${VIASH_PAR_OUT_FILTER_TYPE+x} ]; then echo "r'${VIASH_PAR_OUT_FILTER_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_filter_multimap_score_range': $( if [ ! -z ${VIASH_PAR_OUT_FILTER_MULTIMAP_SCORE_RANGE+x} ]; then echo "int(r'${VIASH_PAR_OUT_FILTER_MULTIMAP_SCORE_RANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_filter_multimap_nmax': $( if [ ! -z ${VIASH_PAR_OUT_FILTER_MULTIMAP_NMAX+x} ]; then echo "int(r'${VIASH_PAR_OUT_FILTER_MULTIMAP_NMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_filter_mismatch_nmax': $( if [ ! -z ${VIASH_PAR_OUT_FILTER_MISMATCH_NMAX+x} ]; then echo "int(r'${VIASH_PAR_OUT_FILTER_MISMATCH_NMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_filter_mismatch_nover_lmax': $( if [ ! -z ${VIASH_PAR_OUT_FILTER_MISMATCH_NOVER_LMAX+x} ]; then echo "float(r'${VIASH_PAR_OUT_FILTER_MISMATCH_NOVER_LMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_filter_mismatch_nover_read_lmax': $( if [ ! -z ${VIASH_PAR_OUT_FILTER_MISMATCH_NOVER_READ_LMAX+x} ]; then echo "float(r'${VIASH_PAR_OUT_FILTER_MISMATCH_NOVER_READ_LMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_filter_score_min': $( if [ ! -z ${VIASH_PAR_OUT_FILTER_SCORE_MIN+x} ]; then echo "int(r'${VIASH_PAR_OUT_FILTER_SCORE_MIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_filter_score_min_over_lread': $( if [ ! -z ${VIASH_PAR_OUT_FILTER_SCORE_MIN_OVER_LREAD+x} ]; then echo "float(r'${VIASH_PAR_OUT_FILTER_SCORE_MIN_OVER_LREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_filter_match_nmin': $( if [ ! -z ${VIASH_PAR_OUT_FILTER_MATCH_NMIN+x} ]; then echo "int(r'${VIASH_PAR_OUT_FILTER_MATCH_NMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_filter_match_nmin_over_lread': $( if [ ! -z ${VIASH_PAR_OUT_FILTER_MATCH_NMIN_OVER_LREAD+x} ]; then echo "float(r'${VIASH_PAR_OUT_FILTER_MATCH_NMIN_OVER_LREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'out_filter_intron_motifs': $( if [ ! -z ${VIASH_PAR_OUT_FILTER_INTRON_MOTIFS+x} ]; then echo "r'${VIASH_PAR_OUT_FILTER_INTRON_MOTIFS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_filter_intron_strands': $( if [ ! -z ${VIASH_PAR_OUT_FILTER_INTRON_STRANDS+x} ]; then echo "r'${VIASH_PAR_OUT_FILTER_INTRON_STRANDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_sj_type': $( if [ ! -z ${VIASH_PAR_OUT_SJ_TYPE+x} ]; then echo "r'${VIASH_PAR_OUT_SJ_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_sj_filter_reads': $( if [ ! -z ${VIASH_PAR_OUT_SJ_FILTER_READS+x} ]; then echo "r'${VIASH_PAR_OUT_SJ_FILTER_READS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'out_sj_filter_overhang_min': $( if [ ! -z ${VIASH_PAR_OUT_SJ_FILTER_OVERHANG_MIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUT_SJ_FILTER_OVERHANG_MIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'out_sj_filter_count_unique_min': $( if [ ! -z ${VIASH_PAR_OUT_SJ_FILTER_COUNT_UNIQUE_MIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUT_SJ_FILTER_COUNT_UNIQUE_MIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'out_sj_filter_count_total_min': $( if [ ! -z ${VIASH_PAR_OUT_SJ_FILTER_COUNT_TOTAL_MIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUT_SJ_FILTER_COUNT_TOTAL_MIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'out_sj_filter_dist_to_other_sj_min': $( if [ ! -z ${VIASH_PAR_OUT_SJ_FILTER_DIST_TO_OTHER_SJ_MIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUT_SJ_FILTER_DIST_TO_OTHER_SJ_MIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'out_sj_filter_intron_max_vs_read_n': $( if [ ! -z ${VIASH_PAR_OUT_SJ_FILTER_INTRON_MAX_VS_READ_N+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUT_SJ_FILTER_INTRON_MAX_VS_READ_N//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'score_gap': $( if [ ! -z ${VIASH_PAR_SCORE_GAP+x} ]; then echo "int(r'${VIASH_PAR_SCORE_GAP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'score_gap_noncan': $( if [ ! -z ${VIASH_PAR_SCORE_GAP_NONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCORE_GAP_NONCAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'score_gap_gcag': $( if [ ! -z ${VIASH_PAR_SCORE_GAP_GCAG+x} ]; then echo "int(r'${VIASH_PAR_SCORE_GAP_GCAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'score_gap_atac': $( if [ ! -z ${VIASH_PAR_SCORE_GAP_ATAC+x} ]; then echo "int(r'${VIASH_PAR_SCORE_GAP_ATAC//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'score_genomic_length_log2scale': $( if [ ! -z ${VIASH_PAR_SCORE_GENOMIC_LENGTH_LOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCORE_GENOMIC_LENGTH_LOG2SCALE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'score_del_open': $( if [ ! -z ${VIASH_PAR_SCORE_DEL_OPEN+x} ]; then echo "int(r'${VIASH_PAR_SCORE_DEL_OPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'score_del_base': $( if [ ! -z ${VIASH_PAR_SCORE_DEL_BASE+x} ]; then echo "int(r'${VIASH_PAR_SCORE_DEL_BASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'score_ins_open': $( if [ ! -z ${VIASH_PAR_SCORE_INS_OPEN+x} ]; then echo "int(r'${VIASH_PAR_SCORE_INS_OPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'score_ins_base': $( if [ ! -z ${VIASH_PAR_SCORE_INS_BASE+x} ]; then echo "int(r'${VIASH_PAR_SCORE_INS_BASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'score_stitch_sj_shift': $( if [ ! -z ${VIASH_PAR_SCORE_STITCH_SJ_SHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORE_STITCH_SJ_SHIFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seed_search_start_lmax': $( if [ ! -z ${VIASH_PAR_SEED_SEARCH_START_LMAX+x} ]; then echo "int(r'${VIASH_PAR_SEED_SEARCH_START_LMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seed_search_start_lmax_over_lread': $( if [ ! -z ${VIASH_PAR_SEED_SEARCH_START_LMAX_OVER_LREAD+x} ]; then echo "float(r'${VIASH_PAR_SEED_SEARCH_START_LMAX_OVER_LREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seed_search_lmax': $( if [ ! -z ${VIASH_PAR_SEED_SEARCH_LMAX+x} ]; then echo "int(r'${VIASH_PAR_SEED_SEARCH_LMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seed_multimap_nmax': $( if [ ! -z ${VIASH_PAR_SEED_MULTIMAP_NMAX+x} ]; then echo "int(r'${VIASH_PAR_SEED_MULTIMAP_NMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seed_per_read_nmax': $( if [ ! -z ${VIASH_PAR_SEED_PER_READ_NMAX+x} ]; then echo "int(r'${VIASH_PAR_SEED_PER_READ_NMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seed_per_window_nmax': $( if [ ! -z ${VIASH_PAR_SEED_PER_WINDOW_NMAX+x} ]; then echo "int(r'${VIASH_PAR_SEED_PER_WINDOW_NMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seed_none_loci_per_window': $( if [ ! -z ${VIASH_PAR_SEED_NONE_LOCI_PER_WINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEED_NONE_LOCI_PER_WINDOW//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seed_split_min': $( if [ ! -z ${VIASH_PAR_SEED_SPLIT_MIN+x} ]; then echo "int(r'${VIASH_PAR_SEED_SPLIT_MIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seed_map_min': $( if [ ! -z ${VIASH_PAR_SEED_MAP_MIN+x} ]; then echo "int(r'${VIASH_PAR_SEED_MAP_MIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'align_intron_min': $( if [ ! -z ${VIASH_PAR_ALIGN_INTRON_MIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGN_INTRON_MIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'align_intron_max': $( if [ ! -z ${VIASH_PAR_ALIGN_INTRON_MAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGN_INTRON_MAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'align_mates_gap_max': $( if [ ! -z ${VIASH_PAR_ALIGN_MATES_GAP_MAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGN_MATES_GAP_MAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'align_sj_overhang_min': $( if [ ! -z ${VIASH_PAR_ALIGN_SJ_OVERHANG_MIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGN_SJ_OVERHANG_MIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'align_sj_stitch_mismatch_nmax': $( if [ ! -z ${VIASH_PAR_ALIGN_SJ_STITCH_MISMATCH_NMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGN_SJ_STITCH_MISMATCH_NMAX//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'align_sjdb_overhang_min': $( if [ ! -z ${VIASH_PAR_ALIGN_SJDB_OVERHANG_MIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGN_SJDB_OVERHANG_MIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'align_spliced_mate_map_lmin': $( if [ ! -z ${VIASH_PAR_ALIGN_SPLICED_MATE_MAP_LMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGN_SPLICED_MATE_MAP_LMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'align_spliced_mate_map_lmin_over_lmate': $( if [ ! -z ${VIASH_PAR_ALIGN_SPLICED_MATE_MAP_LMIN_OVER_LMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGN_SPLICED_MATE_MAP_LMIN_OVER_LMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'align_windows_per_read_nmax': $( if [ ! -z ${VIASH_PAR_ALIGN_WINDOWS_PER_READ_NMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGN_WINDOWS_PER_READ_NMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'align_transcripts_per_window_nmax': $( if [ ! -z ${VIASH_PAR_ALIGN_TRANSCRIPTS_PER_WINDOW_NMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGN_TRANSCRIPTS_PER_WINDOW_NMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'align_transcripts_per_read_nmax': $( if [ ! -z ${VIASH_PAR_ALIGN_TRANSCRIPTS_PER_READ_NMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGN_TRANSCRIPTS_PER_READ_NMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'align_ends_type': $( if [ ! -z ${VIASH_PAR_ALIGN_ENDS_TYPE+x} ]; then echo "r'${VIASH_PAR_ALIGN_ENDS_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'align_ends_protrude': $( if [ ! -z ${VIASH_PAR_ALIGN_ENDS_PROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGN_ENDS_PROTRUDE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'align_soft_clip_at_reference_ends': $( if [ ! -z ${VIASH_PAR_ALIGN_SOFT_CLIP_AT_REFERENCE_ENDS+x} ]; then echo "r'${VIASH_PAR_ALIGN_SOFT_CLIP_AT_REFERENCE_ENDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'align_insertion_flush': $( if [ ! -z ${VIASH_PAR_ALIGN_INSERTION_FLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGN_INSERTION_FLUSH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'pe_overlap_nbases_min': $( if [ ! -z ${VIASH_PAR_PE_OVERLAP_NBASES_MIN+x} ]; then echo "int(r'${VIASH_PAR_PE_OVERLAP_NBASES_MIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'pe_overlap_mm_p': $( if [ ! -z ${VIASH_PAR_PE_OVERLAP_MM_P+x} ]; then echo "float(r'${VIASH_PAR_PE_OVERLAP_MM_P//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'win_anchor_multimap_nmax': $( if [ ! -z ${VIASH_PAR_WIN_ANCHOR_MULTIMAP_NMAX+x} ]; then echo "int(r'${VIASH_PAR_WIN_ANCHOR_MULTIMAP_NMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'win_bin_nbits': $( if [ ! -z ${VIASH_PAR_WIN_BIN_NBITS+x} ]; then echo "int(r'${VIASH_PAR_WIN_BIN_NBITS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'win_anchor_dist_nbins': $( if [ ! -z ${VIASH_PAR_WIN_ANCHOR_DIST_NBINS+x} ]; then echo "int(r'${VIASH_PAR_WIN_ANCHOR_DIST_NBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'win_flank_nbins': $( if [ ! -z ${VIASH_PAR_WIN_FLANK_NBINS+x} ]; then echo "int(r'${VIASH_PAR_WIN_FLANK_NBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'win_read_coverage_relative_min': $( if [ ! -z ${VIASH_PAR_WIN_READ_COVERAGE_RELATIVE_MIN+x} ]; then echo "float(r'${VIASH_PAR_WIN_READ_COVERAGE_RELATIVE_MIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'win_read_coverage_bases_min': $( if [ ! -z ${VIASH_PAR_WIN_READ_COVERAGE_BASES_MIN+x} ]; then echo "int(r'${VIASH_PAR_WIN_READ_COVERAGE_BASES_MIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chim_out_type': $( if [ ! -z ${VIASH_PAR_CHIM_OUT_TYPE+x} ]; then echo "r'${VIASH_PAR_CHIM_OUT_TYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'chim_segment_min': $( if [ ! -z ${VIASH_PAR_CHIM_SEGMENT_MIN+x} ]; then echo "int(r'${VIASH_PAR_CHIM_SEGMENT_MIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chim_score_min': $( if [ ! -z ${VIASH_PAR_CHIM_SCORE_MIN+x} ]; then echo "int(r'${VIASH_PAR_CHIM_SCORE_MIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chim_score_drop_max': $( if [ ! -z ${VIASH_PAR_CHIM_SCORE_DROP_MAX+x} ]; then echo "int(r'${VIASH_PAR_CHIM_SCORE_DROP_MAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chim_score_separation': $( if [ ! -z ${VIASH_PAR_CHIM_SCORE_SEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIM_SCORE_SEPARATION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chim_score_junction_non_gtag': $( if [ ! -z ${VIASH_PAR_CHIM_SCORE_JUNCTION_NON_GTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIM_SCORE_JUNCTION_NON_GTAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chim_junction_overhang_min': $( if [ ! -z ${VIASH_PAR_CHIM_JUNCTION_OVERHANG_MIN+x} ]; then echo "int(r'${VIASH_PAR_CHIM_JUNCTION_OVERHANG_MIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chim_segment_read_gap_max': $( if [ ! -z ${VIASH_PAR_CHIM_SEGMENT_READ_GAP_MAX+x} ]; then echo "int(r'${VIASH_PAR_CHIM_SEGMENT_READ_GAP_MAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chim_filter': $( if [ ! -z ${VIASH_PAR_CHIM_FILTER+x} ]; then echo "r'${VIASH_PAR_CHIM_FILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'chim_main_segment_mult_nmax': $( if [ ! -z ${VIASH_PAR_CHIM_MAIN_SEGMENT_MULT_NMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIM_MAIN_SEGMENT_MULT_NMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chim_multimap_nmax': $( if [ ! -z ${VIASH_PAR_CHIM_MULTIMAP_NMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIM_MULTIMAP_NMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chim_multimap_score_range': $( if [ ! -z ${VIASH_PAR_CHIM_MULTIMAP_SCORE_RANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIM_MULTIMAP_SCORE_RANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chim_nonchim_score_drop_min': $( if [ ! -z ${VIASH_PAR_CHIM_NONCHIM_SCORE_DROP_MIN+x} ]; then echo "int(r'${VIASH_PAR_CHIM_NONCHIM_SCORE_DROP_MIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chim_out_junction_format': $( if [ ! -z ${VIASH_PAR_CHIM_OUT_JUNCTION_FORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIM_OUT_JUNCTION_FORMAT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'quant_mode': $( if [ ! -z ${VIASH_PAR_QUANT_MODE+x} ]; then echo "r'${VIASH_PAR_QUANT_MODE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'quant_transcriptome_bam_compression': $( if [ ! -z ${VIASH_PAR_QUANT_TRANSCRIPTOME_BAM_COMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANT_TRANSCRIPTOME_BAM_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'quant_transcriptome_sam_output': $( if [ ! -z ${VIASH_PAR_QUANT_TRANSCRIPTOME_SAM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_QUANT_TRANSCRIPTOME_SAM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'twopass_mode': $( if [ ! -z ${VIASH_PAR_TWOPASS_MODE+x} ]; then echo "r'${VIASH_PAR_TWOPASS_MODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'twopass1reads_n': $( if [ ! -z ${VIASH_PAR_TWOPASS1READS_N+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READS_N//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'wasp_output_mode': $( if [ ! -z ${VIASH_PAR_WASP_OUTPUT_MODE+x} ]; then echo "r'${VIASH_PAR_WASP_OUTPUT_MODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'solo_type': $( if [ ! -z ${VIASH_PAR_SOLO_TYPE+x} ]; then echo "r'${VIASH_PAR_SOLO_TYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'solo_cb_type': $( if [ ! -z ${VIASH_PAR_SOLO_CB_TYPE+x} ]; then echo "r'${VIASH_PAR_SOLO_CB_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'solo_cb_whitelist': $( if [ ! -z ${VIASH_PAR_SOLO_CB_WHITELIST+x} ]; then echo "r'${VIASH_PAR_SOLO_CB_WHITELIST//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'solo_cb_start': $( if [ ! -z ${VIASH_PAR_SOLO_CB_START+x} ]; then echo "int(r'${VIASH_PAR_SOLO_CB_START//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'solo_cb_len': $( if [ ! -z ${VIASH_PAR_SOLO_CB_LEN+x} ]; then echo "int(r'${VIASH_PAR_SOLO_CB_LEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'solo_umi_start': $( if [ ! -z ${VIASH_PAR_SOLO_UMI_START+x} ]; then echo "int(r'${VIASH_PAR_SOLO_UMI_START//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'solo_umi_len': $( if [ ! -z ${VIASH_PAR_SOLO_UMI_LEN+x} ]; then echo "int(r'${VIASH_PAR_SOLO_UMI_LEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'solo_barcode_read_length': $( if [ ! -z ${VIASH_PAR_SOLO_BARCODE_READ_LENGTH+x} ]; then echo "int(r'${VIASH_PAR_SOLO_BARCODE_READ_LENGTH//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'solo_barcode_mate': $( if [ ! -z ${VIASH_PAR_SOLO_BARCODE_MATE+x} ]; then echo "int(r'${VIASH_PAR_SOLO_BARCODE_MATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'solo_cb_position': $( if [ ! -z ${VIASH_PAR_SOLO_CB_POSITION+x} ]; then echo "r'${VIASH_PAR_SOLO_CB_POSITION//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'solo_umi_position': $( if [ ! -z ${VIASH_PAR_SOLO_UMI_POSITION+x} ]; then echo "r'${VIASH_PAR_SOLO_UMI_POSITION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'solo_adapter_sequence': $( if [ ! -z ${VIASH_PAR_SOLO_ADAPTER_SEQUENCE+x} ]; then echo "r'${VIASH_PAR_SOLO_ADAPTER_SEQUENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'solo_adapter_mismatches_nmax': $( if [ ! -z ${VIASH_PAR_SOLO_ADAPTER_MISMATCHES_NMAX+x} ]; then echo "int(r'${VIASH_PAR_SOLO_ADAPTER_MISMATCHES_NMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'solo_cb_match_wl_type': $( if [ ! -z ${VIASH_PAR_SOLO_CB_MATCH_WL_TYPE+x} ]; then echo "r'${VIASH_PAR_SOLO_CB_MATCH_WL_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'solo_input_sam_attr_barcode_seq': $( if [ ! -z ${VIASH_PAR_SOLO_INPUT_SAM_ATTR_BARCODE_SEQ+x} ]; then echo "r'${VIASH_PAR_SOLO_INPUT_SAM_ATTR_BARCODE_SEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'solo_input_sam_attr_barcode_qual': $( if [ ! -z ${VIASH_PAR_SOLO_INPUT_SAM_ATTR_BARCODE_QUAL+x} ]; then echo "r'${VIASH_PAR_SOLO_INPUT_SAM_ATTR_BARCODE_QUAL//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'solo_strand': $( if [ ! -z ${VIASH_PAR_SOLO_STRAND+x} ]; then echo "r'${VIASH_PAR_SOLO_STRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'solo_features': $( if [ ! -z ${VIASH_PAR_SOLO_FEATURES+x} ]; then echo "r'${VIASH_PAR_SOLO_FEATURES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'solo_multi_mappers': $( if [ ! -z ${VIASH_PAR_SOLO_MULTI_MAPPERS+x} ]; then echo "r'${VIASH_PAR_SOLO_MULTI_MAPPERS//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'solo_umi_dedup': $( if [ ! -z ${VIASH_PAR_SOLO_UMI_DEDUP+x} ]; then echo "r'${VIASH_PAR_SOLO_UMI_DEDUP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'solo_umi_filtering': $( if [ ! -z ${VIASH_PAR_SOLO_UMI_FILTERING+x} ]; then echo "r'${VIASH_PAR_SOLO_UMI_FILTERING//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'solo_out_file_names': $( if [ ! -z ${VIASH_PAR_SOLO_OUT_FILE_NAMES+x} ]; then echo "r'${VIASH_PAR_SOLO_OUT_FILE_NAMES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'solo_cell_filter': $( if [ ! -z ${VIASH_PAR_SOLO_CELL_FILTER+x} ]; then echo "r'${VIASH_PAR_SOLO_CELL_FILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'solo_out_format_features_gene_field3': $( if [ ! -z ${VIASH_PAR_SOLO_OUT_FORMAT_FEATURES_GENE_FIELD3+x} ]; then echo "r'${VIASH_PAR_SOLO_OUT_FORMAT_FEATURES_GENE_FIELD3//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'solo_cell_read_stats': $( if [ ! -z ${VIASH_PAR_SOLO_CELL_READ_STATS+x} ]; then echo "r'${VIASH_PAR_SOLO_CELL_READ_STATS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'input_r2': $( if [ ! -z ${VIASH_PAR_INPUT_R2+x} ]; then echo "r'${VIASH_PAR_INPUT_R2//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'aligned_reads': $( if [ ! -z ${VIASH_PAR_ALIGNED_READS+x} ]; then echo "r'${VIASH_PAR_ALIGNED_READS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reads_per_gene': $( if [ ! -z ${VIASH_PAR_READS_PER_GENE+x} ]; then echo "r'${VIASH_PAR_READS_PER_GENE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'unmapped': $( if [ ! -z ${VIASH_PAR_UNMAPPED+x} ]; then echo "r'${VIASH_PAR_UNMAPPED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'unmapped_r2': $( if [ ! -z ${VIASH_PAR_UNMAPPED_R2+x} ]; then echo "r'${VIASH_PAR_UNMAPPED_R2//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'chimeric_junctions': $( if [ ! -z ${VIASH_PAR_CHIMERIC_JUNCTIONS+x} ]; then echo "r'${VIASH_PAR_CHIMERIC_JUNCTIONS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'log': $( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "r'${VIASH_PAR_LOG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'splice_junctions': $( if [ ! -z ${VIASH_PAR_SPLICE_JUNCTIONS+x} ]; then echo "r'${VIASH_PAR_SPLICE_JUNCTIONS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reads_aligned_to_transcriptome': $( if [ ! -z ${VIASH_PAR_READS_ALIGNED_TO_TRANSCRIPTOME+x} ]; then echo "r'${VIASH_PAR_READS_ALIGNED_TO_TRANSCRIPTOME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +dep = { + +} + +## VIASH END + +# read config +with open(meta["config"], 'r') as stream: + config = yaml.safe_load(stream) +all_arguments = { + arg["name"].lstrip('-'): arg + for argument_group in config["argument_groups"] + for arg in argument_group["arguments"] +} + +################################################## +# check and process SE / PE R1 input files +input_r1 = par["input"] +readFilesIn = ",".join(par["input"]) +par["input"] = None + +# check and process PE R2 input files +input_r2 = par["input_r2"] +if input_r2 is not None: + if len(input_r1) != len(input_r2): + raise ValueError("The number of R1 and R2 files do not match.") + readFilesIn = [readFilesIn, ",".join(par["input_r2"])] + par["input_r2"] = None + +# store readFilesIn +par["readFilesIn"] = readFilesIn + +################################################## + +# determine readFilesCommand +if input_r1[0].endswith(".gz"): + print(">> Input files are gzipped, setting readFilesCommand to zcat", flush=True) + par["readFilesCommand"] = "zcat" +elif input_r1[0].endswith(".bz2"): + print(">> Input files are bzipped, setting readFilesCommand to bzcat", flush=True) + par["readFilesCommand"] = "bzcat" + +################################################## +# store output paths +expected_outputs = { + "aligned_reads": ["Aligned.out.sam", "Aligned.out.bam"], + "reads_per_gene": "ReadsPerGene.out.tab", + "chimeric_junctions": "Chimeric.out.junction", + "log": "Log.final.out", + "splice_junctions": "SJ.out.tab", + "unmapped": "Unmapped.out.mate1", + "unmapped_r2": "Unmapped.out.mate2", + "reads_aligned_to_transcriptome": "Aligned.toTranscriptome.out.bam" +} +output_paths = {name: par[name] for name in expected_outputs.keys()} +for name in expected_outputs.keys(): + par[name] = None + +################################################## +# process other args +par["runMode"] = "alignReads" + +if "cpus" in meta and meta["cpus"]: + par["runThreadN"] = meta["cpus"] + +################################################## +# run STAR and move output to final destination +with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"], ignore_cleanup_errors=True) as temp_dir: + print(">> Constructing command", flush=True) + + # set output paths + temp_dir = Path(temp_dir) + par["outTmpDir"] = temp_dir / "tempdir" + out_dir = temp_dir / "out" + par["outFileNamePrefix"] = f"{out_dir}/" # star needs this slash + + # construct command + cmd_args = [ "STAR" ] + for name, value in par.items(): + if value is not None: + if name in all_arguments: + arg_info = all_arguments[name].get("info", {}) + cli_name = arg_info.get("orig_name", f"--{name}") + else: + cli_name = f"--{name}" + val_to_add = value if isinstance(value, list) else [value] + cmd_args.extend([cli_name] + [str(x) for x in val_to_add]) + print("", flush=True) + + # run command + print(">> Running STAR with command:", flush=True) + print(f"+ {' '.join(cmd_args)}", end="\\\\n\\\\n", flush=True) + subprocess.run( + cmd_args, + check=True + ) + print(">> STAR finished successfully", end="\\\\n\\\\n", flush=True) + + # move output to final destination + print(">> Moving output to final destination", flush=True) + for name, paths in expected_outputs.items(): + for expected_path in [paths] if isinstance(paths, str) else paths: + expected_full_path = out_dir / expected_path + if output_paths[name] and expected_full_path.is_file(): + print(f">> Moving {expected_path} to {output_paths[name]}", flush=True) + shutil.move(expected_full_path, output_paths[name]) +VIASHMAIN +python -B "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/star/star_align_reads", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/nextflow.config new file mode 100644 index 0000000..6395c22 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'star/star_align_reads' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Aligns reads to a reference genome using STAR.\n' + author = 'Angela Oliveira Pisco, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/nextflow_schema.json new file mode 100644 index 0000000..b839aa8 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/nextflow_schema.json @@ -0,0 +1,2314 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "star_align_reads", +"description": "Aligns reads to a reference genome using STAR.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz`, multiple_sep: `\";\"`. The single-end or paired-end R1 FastQ files to be processed", + "help_text": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz`, multiple_sep: `\";\"`. The single-end or paired-end R1 FastQ files to be processed." + + } + + + , + "input_r2": { + "type": + "string", + "description": "Type: List of `file`, example: `mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The paired-end R2 FastQ files to be processed", + "help_text": "Type: List of `file`, example: `mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The paired-end R2 FastQ files to be processed. Only required if --input is a paired-end R1 file." + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "aligned_reads": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.aligned_reads.bam`, example: `aligned_reads.bam`. The output file containing the aligned reads", + "help_text": "Type: `file`, required, default: `$id.$key.aligned_reads.bam`, example: `aligned_reads.bam`. The output file containing the aligned reads." + , + "default":"$id.$key.aligned_reads.bam" + } + + + , + "reads_per_gene": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.reads_per_gene.tsv`, example: `reads_per_gene.tsv`. The output file containing the number of reads per gene", + "help_text": "Type: `file`, default: `$id.$key.reads_per_gene.tsv`, example: `reads_per_gene.tsv`. The output file containing the number of reads per gene." + , + "default":"$id.$key.reads_per_gene.tsv" + } + + + , + "unmapped": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.unmapped.fastq`, example: `unmapped.fastq`. The output file containing the unmapped reads", + "help_text": "Type: `file`, default: `$id.$key.unmapped.fastq`, example: `unmapped.fastq`. The output file containing the unmapped reads." + , + "default":"$id.$key.unmapped.fastq" + } + + + , + "unmapped_r2": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.unmapped_r2.fastq`, example: `unmapped_r2.fastq`. The output file containing the unmapped R2 reads", + "help_text": "Type: `file`, default: `$id.$key.unmapped_r2.fastq`, example: `unmapped_r2.fastq`. The output file containing the unmapped R2 reads." + , + "default":"$id.$key.unmapped_r2.fastq" + } + + + , + "chimeric_junctions": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.chimeric_junctions.tsv`, example: `chimeric_junctions.tsv`. The output file containing the chimeric junctions", + "help_text": "Type: `file`, default: `$id.$key.chimeric_junctions.tsv`, example: `chimeric_junctions.tsv`. The output file containing the chimeric junctions." + , + "default":"$id.$key.chimeric_junctions.tsv" + } + + + , + "log": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.log.txt`, example: `log.txt`. The output file containing the log of the alignment process", + "help_text": "Type: `file`, default: `$id.$key.log.txt`, example: `log.txt`. The output file containing the log of the alignment process." + , + "default":"$id.$key.log.txt" + } + + + , + "splice_junctions": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.splice_junctions.tsv`, example: `splice_junctions.tsv`. The output file containing the splice junctions", + "help_text": "Type: `file`, default: `$id.$key.splice_junctions.tsv`, example: `splice_junctions.tsv`. The output file containing the splice junctions." + , + "default":"$id.$key.splice_junctions.tsv" + } + + + , + "reads_aligned_to_transcriptome": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.reads_aligned_to_transcriptome.bam`, example: `transcriptome_aligned.bam`. The output file containing the alignments to transcriptome in BAM formats", + "help_text": "Type: `file`, default: `$id.$key.reads_aligned_to_transcriptome.bam`, example: `transcriptome_aligned.bam`. The output file containing the alignments to transcriptome in BAM formats. This file is generated when --quantMode is set to TranscriptomeSAM." + , + "default":"$id.$key.reads_aligned_to_transcriptome.bam" + } + + +} +}, + + + "run parameters" : { + "title": "Run Parameters", + "type": "object", + "description": "No description", + "properties": { + + + "run_rng_seed": { + "type": + "integer", + "description": "Type: `integer`, example: `777`. random number generator seed", + "help_text": "Type: `integer`, example: `777`. random number generator seed." + + } + + +} +}, + + + "genome parameters" : { + "title": "Genome Parameters", + "type": "object", + "description": "No description", + "properties": { + + + "genome_dir": { + "type": + "string", + "description": "Type: `file`, required, example: `./GenomeDir`. path to the directory where genome files are stored (for --runMode alignReads) or will be generated (for --runMode generateGenome)", + "help_text": "Type: `file`, required, example: `./GenomeDir`. path to the directory where genome files are stored (for --runMode alignReads) or will be generated (for --runMode generateGenome)" + + } + + + , + "genome_load": { + "type": + "string", + "description": "Type: `string`, example: `NoSharedMemory`. mode of shared memory usage for the genome files", + "help_text": "Type: `string`, example: `NoSharedMemory`. mode of shared memory usage for the genome files. Only used with --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and keep it in memory after run\n- LoadAndRemove ... load genome into shared but remove it after run\n- LoadAndExit ... load genome into shared memory and exit, keeping the genome in memory for future runs\n- Remove ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory ... do not use shared memory, each job will have its own private copy of the genome" + + } + + + , + "genome_fasta_files": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. path(s) to the fasta files with the genome sequences, separated by spaces", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.\n\nRequired for the genome generation (--runMode genomeGenerate). Can also be used in the mapping (--runMode alignReads) to add extra (new) sequences to the genome (e.g. spike-ins)." + + } + + + , + "genome_file_sizes": { + "type": + "string", + "description": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. genome files exact sizes in bytes", + "help_text": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. genome files exact sizes in bytes. Typically, this should not be defined by the user." + + } + + + , + "genome_transform_output": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. which output to transform back to original genome\n\n- SAM ", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. which output to transform back to original genome\n\n- SAM ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n- Quant ... quantifications (from --quant_mode option)\n- None ... no transformation of the output" + + } + + + , + "genome_chr_set_mitochondrial": { + "type": + "string", + "description": "Type: List of `string`, example: `chrM;M;MT`, multiple_sep: `\";\"`. names of the mitochondrial chromosomes", + "help_text": "Type: List of `string`, example: `chrM;M;MT`, multiple_sep: `\";\"`. names of the mitochondrial chromosomes. Presently only used for STARsolo statistics output/" + + } + + +} +}, + + + "splice junctions database" : { + "title": "Splice Junctions Database", + "type": "object", + "description": "No description", + "properties": { + + + "sjdb_file_chr_start_end": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. path to the files with genomic coordinates (chr \u003ctab\u003e start \u003ctab\u003e end \u003ctab\u003e strand) for the splice junction introns", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. path to the files with genomic coordinates (chr \u003ctab\u003e start \u003ctab\u003e end \u003ctab\u003e strand) for the splice junction introns. Multiple files can be supplied and will be concatenated." + + } + + + , + "sjdb_gtf_file": { + "type": + "string", + "description": "Type: `file`. path to the GTF file with annotations", + "help_text": "Type: `file`. path to the GTF file with annotations" + + } + + + , + "sjdb_gtf_chr_prefix": { + "type": + "string", + "description": "Type: `string`. prefix for chromosome names in a GTF file (e", + "help_text": "Type: `string`. prefix for chromosome names in a GTF file (e.g. \u0027chr\u0027 for using ENSMEBL annotations with UCSC genomes)" + + } + + + , + "sjdb_gtf_feature_exon": { + "type": + "string", + "description": "Type: `string`, example: `exon`. feature type in GTF file to be used as exons for building transcripts", + "help_text": "Type: `string`, example: `exon`. feature type in GTF file to be used as exons for building transcripts" + + } + + + , + "sjdb_gtf_tag_exon_parent_transcript": { + "type": + "string", + "description": "Type: `string`, example: `transcript_id`. GTF attribute name for parent transcript ID (default \"transcript_id\" works for GTF files)", + "help_text": "Type: `string`, example: `transcript_id`. GTF attribute name for parent transcript ID (default \"transcript_id\" works for GTF files)" + + } + + + , + "sjdb_gtf_tag_exon_parent_gene": { + "type": + "string", + "description": "Type: `string`, example: `gene_id`. GTF attribute name for parent gene ID (default \"gene_id\" works for GTF files)", + "help_text": "Type: `string`, example: `gene_id`. GTF attribute name for parent gene ID (default \"gene_id\" works for GTF files)" + + } + + + , + "sjdb_gtf_tag_exon_parent_gene_name": { + "type": + "string", + "description": "Type: List of `string`, example: `gene_name`, multiple_sep: `\";\"`. GTF attribute name for parent gene name", + "help_text": "Type: List of `string`, example: `gene_name`, multiple_sep: `\";\"`. GTF attribute name for parent gene name" + + } + + + , + "sjdb_gtf_tag_exon_parent_gene_type": { + "type": + "string", + "description": "Type: List of `string`, example: `gene_type;gene_biotype`, multiple_sep: `\";\"`. GTF attribute name for parent gene type", + "help_text": "Type: List of `string`, example: `gene_type;gene_biotype`, multiple_sep: `\";\"`. GTF attribute name for parent gene type" + + } + + + , + "sjdb_overhang": { + "type": + "integer", + "description": "Type: `integer`, example: `100`. length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)", + "help_text": "Type: `integer`, example: `100`. length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)" + + } + + + , + "sjdb_score": { + "type": + "integer", + "description": "Type: `integer`, example: `2`. extra alignment score for alignments that cross database junctions", + "help_text": "Type: `integer`, example: `2`. extra alignment score for alignments that cross database junctions" + + } + + + , + "sjdb_insert_save": { + "type": + "string", + "description": "Type: `string`, example: `Basic`. which files to save when sjdb junctions are inserted on the fly at the mapping step\n\n- Basic ", + "help_text": "Type: `string`, example: `Basic`. which files to save when sjdb junctions are inserted on the fly at the mapping step\n\n- Basic ... only small junction / transcript files\n- All ... all files including big Genome, SA and SAindex - this will create a complete genome directory" + + } + + +} +}, + + + "variation parameters" : { + "title": "Variation parameters", + "type": "object", + "description": "No description", + "properties": { + + + "var_vcf_file": { + "type": + "string", + "description": "Type: `string`. path to the VCF file that contains variation data", + "help_text": "Type: `string`. path to the VCF file that contains variation data. The 10th column should contain the genotype information, e.g. 0/1" + + } + + +} +}, + + + "read parameters" : { + "title": "Read Parameters", + "type": "object", + "description": "No description", + "properties": { + + + "read_files_type": { + "type": + "string", + "description": "Type: `string`, example: `Fastx`. format of input read files\n\n- Fastx ", + "help_text": "Type: `string`, example: `Fastx`. format of input read files\n\n- Fastx ... FASTA or FASTQ\n- SAM SE ... SAM or BAM single-end reads; for BAM use --read_files_command samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use --read_files_command samtools view" + + } + + + , + "read_files_sam_attr_keep": { + "type": + "string", + "description": "Type: List of `string`, example: `All`, multiple_sep: `\";\"`. for --read_files_type SAM SE/PE, which SAM tags to keep in the output BAM, e", + "help_text": "Type: List of `string`, example: `All`, multiple_sep: `\";\"`. for --read_files_type SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n- None ... do not keep any tags" + + } + + + , + "read_files_manifest": { + "type": + "string", + "description": "Type: `file`. path to the \"manifest\" file with the names of read files", + "help_text": "Type: `file`. path to the \"manifest\" file with the names of read files. The manifest file should contain 3 tab-separated columns:\n\npaired-end reads: read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces, but not tabs are allowed in file names.\nIf read_group_line does not start with ID:, it can only contain one ID field, and ID: will be added to it.\nIf read_group_line starts with ID:, it can contain several fields separated by $tab$, and all fields will be be copied verbatim into SAM @RG header line." + + } + + + , + "read_files_prefix": { + "type": + "string", + "description": "Type: `string`. prefix for the read files names, i", + "help_text": "Type: `string`. prefix for the read files names, i.e. it will be added in front of the strings in --readFilesIn" + + } + + + , + "read_files_command": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. command line to execute for each of the input file", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout\n\nFor example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." + + } + + + , + "read_map_number": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. number of reads to map from the beginning of the file\n\n-1: map all reads", + "help_text": "Type: `integer`, example: `-1`. number of reads to map from the beginning of the file\n\n-1: map all reads" + + } + + + , + "read_mates_lengths_in": { + "type": + "string", + "description": "Type: `string`, example: `NotEqual`. Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same", + "help_text": "Type: `string`, example: `NotEqual`. Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same. NotEqual is safe in all situations." + + } + + + , + "read_name_separator": { + "type": + "string", + "description": "Type: List of `string`, example: `/`, multiple_sep: `\";\"`. character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)", + "help_text": "Type: List of `string`, example: `/`, multiple_sep: `\";\"`. character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)" + + } + + + , + "read_quality_score_base": { + "type": + "integer", + "description": "Type: `integer`, example: `33`. number to be subtracted from the ASCII code to get Phred quality score", + "help_text": "Type: `integer`, example: `33`. number to be subtracted from the ASCII code to get Phred quality score" + + } + + +} +}, + + + "read clipping" : { + "title": "Read Clipping", + "type": "object", + "description": "No description", + "properties": { + + + "clip_adapter_type": { + "type": + "string", + "description": "Type: `string`, example: `Hamming`. adapter clipping type\n\n- Hamming ", + "help_text": "Type: `string`, example: `Hamming`. adapter clipping type\n\n- Hamming ... adapter clipping based on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n- CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None ... no adapter clipping, all other clip* parameters are disregarded" + + } + + + , + "clip3p_nbases": { + "type": + "string", + "description": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. number(s) of bases to clip from 3p of each mate", + "help_text": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates." + + } + + + , + "clip3p_adapter_seq": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. adapter sequences to clip from 3p of each mate", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. adapter sequences to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.\n\n- polyA ... polyA sequence with the length equal to read length" + + } + + + , + "clip3p_adapter_mm_p": { + "type": + "string", + "description": "Type: List of `double`, example: `0.1`, multiple_sep: `\";\"`. max proportion of mismatches for 3p adapter clipping for each mate", + "help_text": "Type: List of `double`, example: `0.1`, multiple_sep: `\";\"`. max proportion of mismatches for 3p adapter clipping for each mate. If one value is given, it will be assumed the same for both mates." + + } + + + , + "clip3p_after_adapter_nbases": { + "type": + "string", + "description": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. number of bases to clip from 3p of each mate after the adapter clipping", + "help_text": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates." + + } + + + , + "clip5p_nbases": { + "type": + "string", + "description": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. number(s) of bases to clip from 5p of each mate", + "help_text": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates." + + } + + +} +}, + + + "limits" : { + "title": "Limits", + "type": "object", + "description": "No description", + "properties": { + + + "limit_genome_generate_ram": { + "type": + "string", + "description": "Type: `long`, example: `31000000000`. maximum available RAM (bytes) for genome generation", + "help_text": "Type: `long`, example: `31000000000`. maximum available RAM (bytes) for genome generation" + + } + + + , + "limit_io_buffer_size": { + "type": + "string", + "description": "Type: List of `long`, example: `30000000;50000000`, multiple_sep: `\";\"`. max available buffers size (bytes) for input/output, per thread", + "help_text": "Type: List of `long`, example: `30000000;50000000`, multiple_sep: `\";\"`. max available buffers size (bytes) for input/output, per thread" + + } + + + , + "limit_out_sam_one_read_bytes": { + "type": + "string", + "description": "Type: `long`, example: `100000`. max size of the SAM record (bytes) for one read", + "help_text": "Type: `long`, example: `100000`. max size of the SAM record (bytes) for one read. Recommended value: \u003e(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" + + } + + + , + "limit_out_sj_one_read": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. max number of junctions for one read (including all multi-mappers)", + "help_text": "Type: `integer`, example: `1000`. max number of junctions for one read (including all multi-mappers)" + + } + + + , + "limit_out_sj_collapsed": { + "type": + "integer", + "description": "Type: `integer`, example: `1000000`. max number of collapsed junctions", + "help_text": "Type: `integer`, example: `1000000`. max number of collapsed junctions" + + } + + + , + "limit_bam_sort_ram": { + "type": + "string", + "description": "Type: `long`, example: `0`. maximum available RAM (bytes) for sorting BAM", + "help_text": "Type: `long`, example: `0`. maximum available RAM (bytes) for sorting BAM. If =0, it will be set to the genome index size. 0 value can only be used with --genome_load NoSharedMemory option." + + } + + + , + "limit_sjdb_insert_nsj": { + "type": + "integer", + "description": "Type: `integer`, example: `1000000`. maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run", + "help_text": "Type: `integer`, example: `1000000`. maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run" + + } + + + , + "limit_nreads_soft": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. soft limit on the number of reads", + "help_text": "Type: `integer`, example: `-1`. soft limit on the number of reads" + + } + + +} +}, + + + "output: general" : { + "title": "Output: general", + "type": "object", + "description": "No description", + "properties": { + + + "out_tmp_keep": { + "type": + "string", + "description": "Type: `string`. whether to keep the temporary files after STAR runs is finished\n\n- None ", + "help_text": "Type: `string`. whether to keep the temporary files after STAR runs is finished\n\n- None ... remove all temporary files\n- All ... keep all files" + + } + + + , + "out_std": { + "type": + "string", + "description": "Type: `string`, example: `Log`. which output will be directed to stdout (standard out)\n\n- Log ", + "help_text": "Type: `string`, example: `Log`. which output will be directed to stdout (standard out)\n\n- Log ... log messages\n- SAM ... alignments in SAM format (which normally are output to Aligned.out.sam file), normal standard output will go into Log.std.out\n- BAM_Unsorted ... alignments in BAM format, unsorted. Requires --out_sam_type BAM Unsorted\n- BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --out_sam_type BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome in BAM format, unsorted. Requires --quant_mode TranscriptomeSAM" + + } + + + , + "out_reads_unmapped": { + "type": + "string", + "description": "Type: `string`. output of unmapped and partially mapped (i", + "help_text": "Type: `string`. output of unmapped and partially mapped (i.e. mapped only one mate of a paired end read) reads in separate file(s).\n\n- None ... no output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" + + } + + + , + "out_qs_conversion_add": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. add this number to the quality score (e", + "help_text": "Type: `integer`, example: `0`. add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)" + + } + + + , + "out_multimapper_order": { + "type": + "string", + "description": "Type: `string`, example: `Old_2.4`. order of multimapping alignments in the output files\n\n- Old_2", + "help_text": "Type: `string`, example: `Old_2.4`. order of multimapping alignments in the output files\n\n- Old_2.4 ... quasi-random order used before 2.5.0\n- Random ... random order of alignments for each multi-mapper. Read mates (pairs) are always adjacent, all alignment for each read stay together. This option will become default in the future releases." + + } + + +} +}, + + + "output: sam and bam" : { + "title": "Output: SAM and BAM", + "type": "object", + "description": "No description", + "properties": { + + + "out_sam_type": { + "type": + "string", + "description": "Type: List of `string`, example: `SAM`, multiple_sep: `\";\"`. type of SAM/BAM output\n\n1st word:\n- BAM ", + "help_text": "Type: List of `string`, example: `SAM`, multiple_sep: `\";\"`. type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate ... sorted by coordinate. This option will allocate extra memory for sorting which can be specified by --limit_bam_sort_ram." + + } + + + , + "out_sam_mode": { + "type": + "string", + "description": "Type: `string`, example: `Full`. mode of SAM output\n\n- None ", + "help_text": "Type: `string`, example: `Full`. mode of SAM output\n\n- None ... no SAM output\n- Full ... full SAM output\n- NoQS ... full SAM but without quality scores" + + } + + + , + "out_sam_strand_field": { + "type": + "string", + "description": "Type: `string`. Cufflinks-like strand field flag\n\n- None ", + "help_text": "Type: `string`. Cufflinks-like strand field flag\n\n- None ... not used\n- intronMotif ... strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out." + + } + + + , + "out_sam_attributes": { + "type": + "string", + "description": "Type: List of `string`, example: `Standard`, multiple_sep: `\";\"`. a string of desired SAM attributes, in the order desired for the output SAM", + "help_text": "Type: List of `string`, example: `Standard`, multiple_sep: `\";\"`. a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n- None ... no attributes\n- Standard ... NH HI AS nM\n- All ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ... number of loci the reads maps to: =1 for unique mappers, \u003e1 for multimappers. Standard SAM tag.\n- HI ... multiple alignment index, starts with --out_sam_attr_ih_start (=1 by default). Standard SAM tag.\n- AS ... local alignment score, +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n- nM ... number of mismatches. For PE reads, sum over two mates.\n- NM ... edit distance to the reference (number of mismatched + inserted + deleted bases) for each mate. Standard SAM tag.\n- MD ... string encoding mismatched and deleted reference bases (see standard SAM specifications). Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.\n- jI ... start and end of introns for all junctions (1-based).\n- XS ... alignment strand according to --out_sam_strand_field.\n- MC ... mate\u0027s CIGAR string. Standard SAM tag.\n- ch ... marks all segment of all chimeric alingments for --chim_out_type WithinBAM output.\n- cN ... number of bases clipped from the read ends: 5\u0027 and 3\u0027\n***Variation:\n- vA ... variant allele\n- vG ... genomic coordinate of the variant overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --wasp_output_mode SAMtag.\n- ha ... haplotype (1/2) when mapping to the diploid genome. Requires genome generated with --genomeTransformType Diploid .\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ... gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --out_sam_type BAM SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ ... quality of the entire barcode.\n- sF ... type of feature overlap and number of features for each alignment\n***Unsupported/undocumented:\n- rB ... alignment block read/genomic coordinates.\n- vR ... read coordinate of the variant." + + } + + + , + "out_sam_attr_ih_start": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. start value for the IH attribute", + "help_text": "Type: `integer`, example: `1`. start value for the IH attribute. 0 may be required by some downstream software, such as Cufflinks or StringTie." + + } + + + , + "out_sam_unmapped": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. output of unmapped reads in the SAM format\n\n1st word:\n- None ", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. output of unmapped reads in the SAM format\n\n1st word:\n- None ... no output\n- Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped mate for each alignment, and, in case of unsorted output, keep it adjacent to its mapped mate. Only affects multi-mapping reads." + + } + + + , + "out_sam_order": { + "type": + "string", + "description": "Type: `string`, example: `Paired`. type of sorting for the SAM output\n\nPaired: one mate after the other for all paired alignments\nPairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files", + "help_text": "Type: `string`, example: `Paired`. type of sorting for the SAM output\n\nPaired: one mate after the other for all paired alignments\nPairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files" + + } + + + , + "out_sam_primary_flag": { + "type": + "string", + "description": "Type: `string`, example: `OneBestScore`. which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG\n\n- OneBestScore ", + "help_text": "Type: `string`, example: `OneBestScore`. which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with the best score is primary\n- AllBestScore ... all alignments with the best score are primary" + + } + + + , + "out_sam_read_id": { + "type": + "string", + "description": "Type: `string`, example: `Standard`. read ID record type\n\n- Standard ", + "help_text": "Type: `string`, example: `Standard`. read ID record type\n\n- Standard ... first word (until space) from the FASTx read ID line, removing /1,/2 from the end\n- Number ... read number (index) in the FASTx file" + + } + + + , + "out_sam_mapq_unique": { + "type": + "integer", + "description": "Type: `integer`, example: `255`. 0 to 255: the MAPQ value for unique mappers", + "help_text": "Type: `integer`, example: `255`. 0 to 255: the MAPQ value for unique mappers" + + } + + + , + "out_sam_flag_or": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. 0 to 65535: sam FLAG will be bitwise OR\u0027d with this value, i", + "help_text": "Type: `integer`, example: `0`. 0 to 65535: sam FLAG will be bitwise OR\u0027d with this value, i.e. FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by STAR, and after outSAMflagAND. Can be used to set specific bits that are not set otherwise." + + } + + + , + "out_sam_flag_and": { + "type": + "integer", + "description": "Type: `integer`, example: `65535`. 0 to 65535: sam FLAG will be bitwise AND\u0027d with this value, i", + "help_text": "Type: `integer`, example: `65535`. 0 to 65535: sam FLAG will be bitwise AND\u0027d with this value, i.e. FLAG=FLAG \u0026 outSAMflagOR. This is applied after all flags have been set by STAR, but before outSAMflagOR. Can be used to unset specific bits that are not set otherwise." + + } + + + , + "out_sam_attr_rg_line": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. SAM/BAM read group line", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. SAM/BAM read group line. The first word contains the read group identifier and must start with \"ID:\", e.g. --out_sam_attr_rg_line ID:xxx CN:yy \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.\nComma separated RG lines correspons to different (comma separated) input files in --readFilesIn. Commas have to be surrounded by spaces, e.g.\n--out_sam_attr_rg_line ID:xxx , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" + + } + + + , + "out_sam_header_hd": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. @HD (header) line of the SAM header", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. @HD (header) line of the SAM header" + + } + + + , + "out_sam_header_pg": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. extra @PG (software) line of the SAM header (in addition to STAR)", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. extra @PG (software) line of the SAM header (in addition to STAR)" + + } + + + , + "out_sam_header_comment_file": { + "type": + "string", + "description": "Type: `string`. path to the file with @CO (comment) lines of the SAM header", + "help_text": "Type: `string`. path to the file with @CO (comment) lines of the SAM header" + + } + + + , + "out_sam_filter": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences ", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genome_fasta_files at the mapping stage.\n- KeepAllAddedReferences ... keep all alignments to the extra reference sequences added with --genome_fasta_files at the mapping stage." + + } + + + , + "out_sam_mult_nmax": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. max number of multiple alignments for a read that will be output to the SAM/BAM files", + "help_text": "Type: `integer`, example: `-1`. max number of multiple alignments for a read that will be output to the SAM/BAM files. Note that if this value is not equal to -1, the top scoring alignment will be output first\n\n- -1 ... all alignments (up to --out_filter_multimap_nmax) will be output" + + } + + + , + "out_sam_tlen": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. calculation method for the TLEN field in the SAM/BAM files\n\n- 1 ", + "help_text": "Type: `integer`, example: `1`. calculation method for the TLEN field in the SAM/BAM files\n\n- 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends" + + } + + + , + "out_bam_compression": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. -1 to 10 BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression", + "help_text": "Type: `integer`, example: `1`. -1 to 10 BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression" + + } + + + , + "out_bam_sorting_thread_n": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. \u003e=0: number of threads for BAM sorting", + "help_text": "Type: `integer`, example: `0`. \u003e=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." + + } + + + , + "out_bam_sorting_bins_n": { + "type": + "integer", + "description": "Type: `integer`, example: `50`. \u003e0: number of genome bins for coordinate-sorting", + "help_text": "Type: `integer`, example: `50`. \u003e0: number of genome bins for coordinate-sorting" + + } + + +} +}, + + + "bam processing" : { + "title": "BAM processing", + "type": "object", + "description": "No description", + "properties": { + + + "bam_remove_duplicates_type": { + "type": + "string", + "description": "Type: `string`. mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- - ", + "help_text": "Type: `string`. mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- - ... no duplicate removal/marking\n- UniqueIdentical ... mark all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate unique mappers but not multimappers." + + } + + + , + "bam_remove_duplicates_mate2bases_n": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. number of bases from the 5\u0027 of mate 2 to use in collapsing (e", + "help_text": "Type: `integer`, example: `0`. number of bases from the 5\u0027 of mate 2 to use in collapsing (e.g. for RAMPAGE)" + + } + + +} +}, + + + "output wiggle" : { + "title": "Output Wiggle", + "type": "object", + "description": "No description", + "properties": { + + + "out_wig_type": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. type of signal output, e", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\". Requires sorted BAM: --out_sam_type BAM SortedByCoordinate .\n\n1st word:\n- None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5\u0027 of the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from only 2nd read" + + } + + + , + "out_wig_strand": { + "type": + "string", + "description": "Type: `string`, example: `Stranded`. strandedness of wiggle/bedGraph output\n\n- Stranded ", + "help_text": "Type: `string`, example: `Stranded`. strandedness of wiggle/bedGraph output\n\n- Stranded ... separate strands, str1 and str2\n- Unstranded ... collapsed strands" + + } + + + , + "out_wig_references_prefix": { + "type": + "string", + "description": "Type: `string`. prefix matching reference names to include in the output wiggle file, e", + "help_text": "Type: `string`. prefix matching reference names to include in the output wiggle file, e.g. \"chr\", default \"-\" - include all references" + + } + + + , + "out_wig_norm": { + "type": + "string", + "description": "Type: `string`, example: `RPM`. type of normalization for the signal\n\n- RPM ", + "help_text": "Type: `string`, example: `RPM`. type of normalization for the signal\n\n- RPM ... reads per million of mapped reads\n- None ... no normalization, \"raw\" counts" + + } + + +} +}, + + + "output filtering" : { + "title": "Output Filtering", + "type": "object", + "description": "No description", + "properties": { + + + "out_filter_type": { + "type": + "string", + "description": "Type: `string`, example: `Normal`. type of filtering\n\n- Normal ", + "help_text": "Type: `string`, example: `Normal`. type of filtering\n\n- Normal ... standard filtering using only current alignment\n- BySJout ... keep only those reads that contain junctions that passed filtering into SJ.out.tab" + + } + + + , + "out_filter_multimap_score_range": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. the score range below the maximum score for multimapping alignments", + "help_text": "Type: `integer`, example: `1`. the score range below the maximum score for multimapping alignments" + + } + + + , + "out_filter_multimap_nmax": { + "type": + "integer", + "description": "Type: `integer`, example: `10`. maximum number of loci the read is allowed to map to", + "help_text": "Type: `integer`, example: `10`. maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value.\n\nOtherwise no alignments will be output, and the read will be counted as \"mapped to too many loci\" in the Log.final.out ." + + } + + + , + "out_filter_mismatch_nmax": { + "type": + "integer", + "description": "Type: `integer`, example: `10`. alignment will be output only if it has no more mismatches than this value", + "help_text": "Type: `integer`, example: `10`. alignment will be output only if it has no more mismatches than this value." + + } + + + , + "out_filter_mismatch_nover_lmax": { + "type": + "number", + "description": "Type: `double`, example: `0.3`. alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value", + "help_text": "Type: `double`, example: `0.3`. alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value." + + } + + + , + "out_filter_mismatch_nover_read_lmax": { + "type": + "number", + "description": "Type: `double`, example: `1.0`. alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value", + "help_text": "Type: `double`, example: `1.0`. alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value." + + } + + + , + "out_filter_score_min": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. alignment will be output only if its score is higher than or equal to this value", + "help_text": "Type: `integer`, example: `0`. alignment will be output only if its score is higher than or equal to this value." + + } + + + , + "out_filter_score_min_over_lread": { + "type": + "number", + "description": "Type: `double`, example: `0.66`. same as outFilterScoreMin, but normalized to read length (sum of mates\u0027 lengths for paired-end reads)", + "help_text": "Type: `double`, example: `0.66`. same as outFilterScoreMin, but normalized to read length (sum of mates\u0027 lengths for paired-end reads)" + + } + + + , + "out_filter_match_nmin": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. alignment will be output only if the number of matched bases is higher than or equal to this value", + "help_text": "Type: `integer`, example: `0`. alignment will be output only if the number of matched bases is higher than or equal to this value." + + } + + + , + "out_filter_match_nmin_over_lread": { + "type": + "number", + "description": "Type: `double`, example: `0.66`. sam as outFilterMatchNmin, but normalized to the read length (sum of mates\u0027 lengths for paired-end reads)", + "help_text": "Type: `double`, example: `0.66`. sam as outFilterMatchNmin, but normalized to the read length (sum of mates\u0027 lengths for paired-end reads)." + + } + + + , + "out_filter_intron_motifs": { + "type": + "string", + "description": "Type: `string`. filter alignment using their motifs\n\n- None ", + "help_text": "Type: `string`. filter alignment using their motifs\n\n- None ... no filtering\n- RemoveNoncanonical ... filter out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated ... filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept." + + } + + + , + "out_filter_intron_strands": { + "type": + "string", + "description": "Type: `string`, example: `RemoveInconsistentStrands`. filter alignments\n\n- RemoveInconsistentStrands ", + "help_text": "Type: `string`, example: `RemoveInconsistentStrands`. filter alignments\n\n- RemoveInconsistentStrands ... remove alignments that have junctions with inconsistent strands\n- None ... no filtering" + + } + + +} +}, + + + "output splice junctions (sj.out.tab)" : { + "title": "Output splice junctions (SJ.out.tab)", + "type": "object", + "description": "No description", + "properties": { + + + "out_sj_type": { + "type": + "string", + "description": "Type: `string`, example: `Standard`. type of splice junction output\n\n- Standard ", + "help_text": "Type: `string`, example: `Standard`. type of splice junction output\n\n- Standard ... standard SJ.out.tab output\n- None ... no splice junction output" + + } + + +} +}, + + + "output filtering: splice junctions" : { + "title": "Output Filtering: Splice Junctions", + "type": "object", + "description": "No description", + "properties": { + + + "out_sj_filter_reads": { + "type": + "string", + "description": "Type: `string`, example: `All`. which reads to consider for collapsed splice junctions output\n\n- All ", + "help_text": "Type: `string`, example: `All`. which reads to consider for collapsed splice junctions output\n\n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely mapping reads only" + + } + + + , + "out_sj_filter_overhang_min": { + "type": + "string", + "description": "Type: List of `integer`, example: `30;12;12;12`, multiple_sep: `\";\"`. minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif", + "help_text": "Type: List of `integer`, example: `30;12;12;12`, multiple_sep: `\";\"`. minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\ndoes not apply to annotated junctions" + + } + + + , + "out_sj_filter_count_unique_min": { + "type": + "string", + "description": "Type: List of `integer`, example: `3;1;1;1`, multiple_sep: `\";\"`. minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif", + "help_text": "Type: List of `integer`, example: `3;1;1;1`, multiple_sep: `\";\"`. minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions" + + } + + + , + "out_sj_filter_count_total_min": { + "type": + "string", + "description": "Type: List of `integer`, example: `3;1;1;1`, multiple_sep: `\";\"`. minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif", + "help_text": "Type: List of `integer`, example: `3;1;1;1`, multiple_sep: `\";\"`. minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions" + + } + + + , + "out_sj_filter_dist_to_other_sj_min": { + "type": + "string", + "description": "Type: List of `integer`, example: `10;0;5;10`, multiple_sep: `\";\"`. minimum allowed distance to other junctions\u0027 donor/acceptor\n\ndoes not apply to annotated junctions", + "help_text": "Type: List of `integer`, example: `10;0;5;10`, multiple_sep: `\";\"`. minimum allowed distance to other junctions\u0027 donor/acceptor\n\ndoes not apply to annotated junctions" + + } + + + , + "out_sj_filter_intron_max_vs_read_n": { + "type": + "string", + "description": "Type: List of `integer`, example: `50000;100000;200000`, multiple_sep: `\";\"`. maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ni", + "help_text": "Type: List of `integer`, example: `50000;100000;200000`, multiple_sep: `\";\"`. maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ni.e. by default junctions supported by 1 read can have gaps \u003c=50000b, by 2 reads: \u003c=100000b, by 3 reads: \u003c=200000. by \u003e=4 reads any gap \u003c=alignIntronMax\ndoes not apply to annotated junctions" + + } + + +} +}, + + + "scoring" : { + "title": "Scoring", + "type": "object", + "description": "No description", + "properties": { + + + "score_gap": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. splice junction penalty (independent on intron motif)", + "help_text": "Type: `integer`, example: `0`. splice junction penalty (independent on intron motif)" + + } + + + , + "score_gap_noncan": { + "type": + "integer", + "description": "Type: `integer`, example: `-8`. non-canonical junction penalty (in addition to scoreGap)", + "help_text": "Type: `integer`, example: `-8`. non-canonical junction penalty (in addition to scoreGap)" + + } + + + , + "score_gap_gcag": { + "type": + "integer", + "description": "Type: `integer`, example: `-4`. GC/AG and CT/GC junction penalty (in addition to scoreGap)", + "help_text": "Type: `integer`, example: `-4`. GC/AG and CT/GC junction penalty (in addition to scoreGap)" + + } + + + , + "score_gap_atac": { + "type": + "integer", + "description": "Type: `integer`, example: `-8`. AT/AC and GT/AT junction penalty (in addition to scoreGap)", + "help_text": "Type: `integer`, example: `-8`. AT/AC and GT/AT junction penalty (in addition to scoreGap)" + + } + + + , + "score_genomic_length_log2scale": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)", + "help_text": "Type: `integer`, example: `0`. extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)" + + } + + + , + "score_del_open": { + "type": + "integer", + "description": "Type: `integer`, example: `-2`. deletion open penalty", + "help_text": "Type: `integer`, example: `-2`. deletion open penalty" + + } + + + , + "score_del_base": { + "type": + "integer", + "description": "Type: `integer`, example: `-2`. deletion extension penalty per base (in addition to scoreDelOpen)", + "help_text": "Type: `integer`, example: `-2`. deletion extension penalty per base (in addition to scoreDelOpen)" + + } + + + , + "score_ins_open": { + "type": + "integer", + "description": "Type: `integer`, example: `-2`. insertion open penalty", + "help_text": "Type: `integer`, example: `-2`. insertion open penalty" + + } + + + , + "score_ins_base": { + "type": + "integer", + "description": "Type: `integer`, example: `-2`. insertion extension penalty per base (in addition to scoreInsOpen)", + "help_text": "Type: `integer`, example: `-2`. insertion extension penalty per base (in addition to scoreInsOpen)" + + } + + + , + "score_stitch_sj_shift": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. maximum score reduction while searching for SJ boundaries in the stitching step", + "help_text": "Type: `integer`, example: `1`. maximum score reduction while searching for SJ boundaries in the stitching step" + + } + + +} +}, + + + "alignments and seeding" : { + "title": "Alignments and Seeding", + "type": "object", + "description": "No description", + "properties": { + + + "seed_search_start_lmax": { + "type": + "integer", + "description": "Type: `integer`, example: `50`. defines the search start point through the read - the read is split into pieces no longer than this value", + "help_text": "Type: `integer`, example: `50`. defines the search start point through the read - the read is split into pieces no longer than this value" + + } + + + , + "seed_search_start_lmax_over_lread": { + "type": + "number", + "description": "Type: `double`, example: `1.0`. seedSearchStartLmax normalized to read length (sum of mates\u0027 lengths for paired-end reads)", + "help_text": "Type: `double`, example: `1.0`. seedSearchStartLmax normalized to read length (sum of mates\u0027 lengths for paired-end reads)" + + } + + + , + "seed_search_lmax": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. defines the maximum length of the seeds, if =0 seed length is not limited", + "help_text": "Type: `integer`, example: `0`. defines the maximum length of the seeds, if =0 seed length is not limited" + + } + + + , + "seed_multimap_nmax": { + "type": + "integer", + "description": "Type: `integer`, example: `10000`. only pieces that map fewer than this value are utilized in the stitching procedure", + "help_text": "Type: `integer`, example: `10000`. only pieces that map fewer than this value are utilized in the stitching procedure" + + } + + + , + "seed_per_read_nmax": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. max number of seeds per read", + "help_text": "Type: `integer`, example: `1000`. max number of seeds per read" + + } + + + , + "seed_per_window_nmax": { + "type": + "integer", + "description": "Type: `integer`, example: `50`. max number of seeds per window", + "help_text": "Type: `integer`, example: `50`. max number of seeds per window" + + } + + + , + "seed_none_loci_per_window": { + "type": + "integer", + "description": "Type: `integer`, example: `10`. max number of one seed loci per window", + "help_text": "Type: `integer`, example: `10`. max number of one seed loci per window" + + } + + + , + "seed_split_min": { + "type": + "integer", + "description": "Type: `integer`, example: `12`. min length of the seed sequences split by Ns or mate gap", + "help_text": "Type: `integer`, example: `12`. min length of the seed sequences split by Ns or mate gap" + + } + + + , + "seed_map_min": { + "type": + "integer", + "description": "Type: `integer`, example: `5`. min length of seeds to be mapped", + "help_text": "Type: `integer`, example: `5`. min length of seeds to be mapped" + + } + + + , + "align_intron_min": { + "type": + "integer", + "description": "Type: `integer`, example: `21`. minimum intron size, genomic gap is considered intron if its length\u003e=alignIntronMin, otherwise it is considered Deletion", + "help_text": "Type: `integer`, example: `21`. minimum intron size, genomic gap is considered intron if its length\u003e=alignIntronMin, otherwise it is considered Deletion" + + } + + + , + "align_intron_max": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins", + "help_text": "Type: `integer`, example: `0`. maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins" + + } + + + , + "align_mates_gap_max": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins", + "help_text": "Type: `integer`, example: `0`. maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins" + + } + + + , + "align_sj_overhang_min": { + "type": + "integer", + "description": "Type: `integer`, example: `5`. minimum overhang (i", + "help_text": "Type: `integer`, example: `5`. minimum overhang (i.e. block size) for spliced alignments" + + } + + + , + "align_sj_stitch_mismatch_nmax": { + "type": + "string", + "description": "Type: List of `integer`, example: `0;-1;0;0`, multiple_sep: `\";\"`. maximum number of mismatches for stitching of the splice junctions (-1: no limit)", + "help_text": "Type: List of `integer`, example: `0;-1;0;0`, multiple_sep: `\";\"`. maximum number of mismatches for stitching of the splice junctions (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." + + } + + + , + "align_sjdb_overhang_min": { + "type": + "integer", + "description": "Type: `integer`, example: `3`. minimum overhang (i", + "help_text": "Type: `integer`, example: `3`. minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments" + + } + + + , + "align_spliced_mate_map_lmin": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. minimum mapped length for a read mate that is spliced", + "help_text": "Type: `integer`, example: `0`. minimum mapped length for a read mate that is spliced" + + } + + + , + "align_spliced_mate_map_lmin_over_lmate": { + "type": + "number", + "description": "Type: `double`, example: `0.66`. alignSplicedMateMapLmin normalized to mate length", + "help_text": "Type: `double`, example: `0.66`. alignSplicedMateMapLmin normalized to mate length" + + } + + + , + "align_windows_per_read_nmax": { + "type": + "integer", + "description": "Type: `integer`, example: `10000`. max number of windows per read", + "help_text": "Type: `integer`, example: `10000`. max number of windows per read" + + } + + + , + "align_transcripts_per_window_nmax": { + "type": + "integer", + "description": "Type: `integer`, example: `100`. max number of transcripts per window", + "help_text": "Type: `integer`, example: `100`. max number of transcripts per window" + + } + + + , + "align_transcripts_per_read_nmax": { + "type": + "integer", + "description": "Type: `integer`, example: `10000`. max number of different alignments per read to consider", + "help_text": "Type: `integer`, example: `10000`. max number of different alignments per read to consider" + + } + + + , + "align_ends_type": { + "type": + "string", + "description": "Type: `string`, example: `Local`. type of read ends alignment\n\n- Local ", + "help_text": "Type: `string`, example: `Local`. type of read ends alignment\n\n- Local ... standard local alignment with soft-clipping allowed\n- EndToEnd ... force end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12 ... fully extend only the 5p of the both read1 and read2, all other ends: local alignment" + + } + + + , + "align_ends_protrude": { + "type": + "string", + "description": "Type: `string`, example: `0 ConcordantPair`. allow protrusion of alignment ends, i", + "help_text": "Type: `string`, example: `0 ConcordantPair`. allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate\n\n1st word: int: maximum number of protrusion bases allowed\n2nd word: string:\n- ConcordantPair ... report alignments with non-zero protrusion as concordant pairs\n- DiscordantPair ... report alignments with non-zero protrusion as discordant pairs" + + } + + + , + "align_soft_clip_at_reference_ends": { + "type": + "string", + "description": "Type: `string`, example: `Yes`. allow the soft-clipping of the alignments past the end of the chromosomes\n\n- Yes ", + "help_text": "Type: `string`, example: `Yes`. allow the soft-clipping of the alignments past the end of the chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility with Cufflinks" + + } + + + , + "align_insertion_flush": { + "type": + "string", + "description": "Type: `string`. how to flush ambiguous insertion positions\n\n- None ", + "help_text": "Type: `string`. how to flush ambiguous insertion positions\n\n- None ... insertions are not flushed\n- Right ... insertions are flushed to the right" + + } + + +} +}, + + + "paired-end reads" : { + "title": "Paired-End reads", + "type": "object", + "description": "No description", + "properties": { + + + "pe_overlap_nbases_min": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. minimum number of overlapping bases to trigger mates merging and realignment", + "help_text": "Type: `integer`, example: `0`. minimum number of overlapping bases to trigger mates merging and realignment. Specify \u003e0 value to switch on the \"merginf of overlapping mates\" algorithm." + + } + + + , + "pe_overlap_mm_p": { + "type": + "number", + "description": "Type: `double`, example: `0.01`. maximum proportion of mismatched bases in the overlap area", + "help_text": "Type: `double`, example: `0.01`. maximum proportion of mismatched bases in the overlap area" + + } + + +} +}, + + + "windows, anchors, binning" : { + "title": "Windows, Anchors, Binning", + "type": "object", + "description": "No description", + "properties": { + + + "win_anchor_multimap_nmax": { + "type": + "integer", + "description": "Type: `integer`, example: `50`. max number of loci anchors are allowed to map to", + "help_text": "Type: `integer`, example: `50`. max number of loci anchors are allowed to map to" + + } + + + , + "win_bin_nbits": { + "type": + "integer", + "description": "Type: `integer`, example: `16`. =log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins", + "help_text": "Type: `integer`, example: `16`. =log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins." + + } + + + , + "win_anchor_dist_nbins": { + "type": + "integer", + "description": "Type: `integer`, example: `9`. max number of bins between two anchors that allows aggregation of anchors into one window", + "help_text": "Type: `integer`, example: `9`. max number of bins between two anchors that allows aggregation of anchors into one window" + + } + + + , + "win_flank_nbins": { + "type": + "integer", + "description": "Type: `integer`, example: `4`. log2(winFlank), where win Flank is the size of the left and right flanking regions for each window", + "help_text": "Type: `integer`, example: `4`. log2(winFlank), where win Flank is the size of the left and right flanking regions for each window" + + } + + + , + "win_read_coverage_relative_min": { + "type": + "number", + "description": "Type: `double`, example: `0.5`. minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only", + "help_text": "Type: `double`, example: `0.5`. minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only." + + } + + + , + "win_read_coverage_bases_min": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. minimum number of bases covered by the seeds in a window , for STARlong algorithm only", + "help_text": "Type: `integer`, example: `0`. minimum number of bases covered by the seeds in a window , for STARlong algorithm only." + + } + + +} +}, + + + "chimeric alignments" : { + "title": "Chimeric Alignments", + "type": "object", + "description": "No description", + "properties": { + + + "chim_out_type": { + "type": + "string", + "description": "Type: List of `string`, example: `Junctions`, multiple_sep: `\";\"`. type of chimeric output\n\n- Junctions ", + "help_text": "Type: List of `string`, example: `Junctions`, multiple_sep: `\";\"`. type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n- SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n- WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n- WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental chimeric alignments" + + } + + + , + "chim_segment_min": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. minimum length of chimeric segment length, if ==0, no chimeric output", + "help_text": "Type: `integer`, example: `0`. minimum length of chimeric segment length, if ==0, no chimeric output" + + } + + + , + "chim_score_min": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. minimum total (summed) score of the chimeric segments", + "help_text": "Type: `integer`, example: `0`. minimum total (summed) score of the chimeric segments" + + } + + + , + "chim_score_drop_max": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length", + "help_text": "Type: `integer`, example: `20`. max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length" + + } + + + , + "chim_score_separation": { + "type": + "integer", + "description": "Type: `integer`, example: `10`. minimum difference (separation) between the best chimeric score and the next one", + "help_text": "Type: `integer`, example: `10`. minimum difference (separation) between the best chimeric score and the next one" + + } + + + , + "chim_score_junction_non_gtag": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. penalty for a non-GT/AG chimeric junction", + "help_text": "Type: `integer`, example: `-1`. penalty for a non-GT/AG chimeric junction" + + } + + + , + "chim_junction_overhang_min": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. minimum overhang for a chimeric junction", + "help_text": "Type: `integer`, example: `20`. minimum overhang for a chimeric junction" + + } + + + , + "chim_segment_read_gap_max": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. maximum gap in the read sequence between chimeric segments", + "help_text": "Type: `integer`, example: `0`. maximum gap in the read sequence between chimeric segments" + + } + + + , + "chim_filter": { + "type": + "string", + "description": "Type: List of `string`, example: `banGenomicN`, multiple_sep: `\";\"`. different filters for chimeric alignments\n\n- None ", + "help_text": "Type: List of `string`, example: `banGenomicN`, multiple_sep: `\";\"`. different filters for chimeric alignments\n\n- None ... no filtering\n- banGenomicN ... Ns are not allowed in the genome sequence around the chimeric junction" + + } + + + , + "chim_main_segment_mult_nmax": { + "type": + "integer", + "description": "Type: `integer`, example: `10`. maximum number of multi-alignments for the main chimeric segment", + "help_text": "Type: `integer`, example: `10`. maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments." + + } + + + , + "chim_multimap_nmax": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. maximum number of chimeric multi-alignments\n\n- 0 ", + "help_text": "Type: `integer`, example: `0`. maximum number of chimeric multi-alignments\n\n- 0 ... use the old scheme for chimeric detection which only considered unique alignments" + + } + + + , + "chim_multimap_score_range": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. the score range for multi-mapping chimeras below the best chimeric score", + "help_text": "Type: `integer`, example: `1`. the score range for multi-mapping chimeras below the best chimeric score. Only works with --chim_multimap_nmax \u003e 1" + + } + + + , + "chim_nonchim_score_drop_min": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value", + "help_text": "Type: `integer`, example: `20`. to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value" + + } + + + , + "chim_out_junction_format": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. formatting type for the Chimeric", + "help_text": "Type: `integer`, example: `0`. formatting type for the Chimeric.out.junction file\n\n- 0 ... no comment lines/headers\n- 1 ... comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping" + + } + + +} +}, + + + "quantification of annotations" : { + "title": "Quantification of Annotations", + "type": "object", + "description": "No description", + "properties": { + + + "quant_mode": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. types of quantification requested\n\n- - ", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. types of quantification requested\n\n- - ... none\n- TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate file\n- GeneCounts ... count reads per gene" + + } + + + , + "quant_transcriptome_bam_compression": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. -2 to 10 transcriptome BAM compression level\n\n- -2 ", + "help_text": "Type: `integer`, example: `1`. -2 to 10 transcriptome BAM compression level\n\n- -2 ... no BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n- 10 ... maximum compression" + + } + + + , + "quant_transcriptome_sam_output": { + "type": + "string", + "description": "Type: `string`, example: `BanSingleEnd_BanIndels_ExtendSoftclip`. alignment filtering for TranscriptomeSAM output\n\n- BanSingleEnd_BanIndels_ExtendSoftclip ", + "help_text": "Type: `string`, example: `BanSingleEnd_BanIndels_ExtendSoftclip`. alignment filtering for TranscriptomeSAM output\n\n- BanSingleEnd_BanIndels_ExtendSoftclip ... prohibit indels and single-end alignments, extend softclips - compatible with RSEM\n- BanSingleEnd ... prohibit single-end alignments, allow indels and softclips\n- BanSingleEnd_ExtendSoftclip ... prohibit single-end alignments, extend softclips, allow indels" + + } + + +} +}, + + + "2-pass mapping" : { + "title": "2-pass Mapping", + "type": "object", + "description": "No description", + "properties": { + + + "twopass_mode": { + "type": + "string", + "description": "Type: `string`. 2-pass mapping mode", + "help_text": "Type: `string`. 2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic ... basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly" + + } + + + , + "twopass1reads_n": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. number of reads to process for the 1st step", + "help_text": "Type: `integer`, example: `-1`. number of reads to process for the 1st step. Use very large number (or default -1) to map all reads in the first step." + + } + + +} +}, + + + "wasp parameters" : { + "title": "WASP parameters", + "type": "object", + "description": "No description", + "properties": { + + + "wasp_output_mode": { + "type": + "string", + "description": "Type: `string`. WASP allele-specific output type", + "help_text": "Type: `string`. WASP allele-specific output type. This is re-implementation of the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker, Yoav Gilad \u0026 Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" + + } + + +} +}, + + + "starsolo (single cell rna-seq) parameters" : { + "title": "STARsolo (single cell RNA-seq) parameters", + "type": "object", + "description": "No description", + "properties": { + + + "solo_type": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. type of single-cell RNA-seq\n\n- CB_UMI_Simple ", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI of fixed length and one adapter sequence of fixed length are allowed in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end] CellBarcode_read . Requires --out_sam_type BAM Unsorted [and/or SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired- or single-end), barcodes are corresponding read-groups, no UMI sequences, alignments deduplicated according to alignment start and end (after extending soft-clipped bases)" + + } + + + , + "solo_cb_type": { + "type": + "string", + "description": "Type: `string`, example: `Sequence`. cell barcode type\n\nSequence: cell barcode is a sequence (standard option)\nString: cell barcode is an arbitrary string", + "help_text": "Type: `string`, example: `Sequence`. cell barcode type\n\nSequence: cell barcode is a sequence (standard option)\nString: cell barcode is an arbitrary string" + + } + + + , + "solo_cb_whitelist": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. file(s) with whitelist(s) of cell barcodes", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. file(s) with whitelist(s) of cell barcodes. Only --solo_type CB_UMI_Complex allows more than one whitelist file.\n\n- None ... no whitelist: all cell barcodes are allowed" + + } + + + , + "solo_cb_start": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. cell barcode start base", + "help_text": "Type: `integer`, example: `1`. cell barcode start base" + + } + + + , + "solo_cb_len": { + "type": + "integer", + "description": "Type: `integer`, example: `16`. cell barcode length", + "help_text": "Type: `integer`, example: `16`. cell barcode length" + + } + + + , + "solo_umi_start": { + "type": + "integer", + "description": "Type: `integer`, example: `17`. UMI start base", + "help_text": "Type: `integer`, example: `17`. UMI start base" + + } + + + , + "solo_umi_len": { + "type": + "integer", + "description": "Type: `integer`, example: `10`. UMI length", + "help_text": "Type: `integer`, example: `10`. UMI length" + + } + + + , + "solo_barcode_read_length": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. length of the barcode read\n\n- 1 ", + "help_text": "Type: `integer`, example: `1`. length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n- 0 ... not defined, do not check" + + } + + + , + "solo_barcode_mate": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. identifies which read mate contains the barcode (CB+UMI) sequence\n\n- 0 ", + "help_text": "Type: `integer`, example: `0`. identifies which read mate contains the barcode (CB+UMI) sequence\n\n- 0 ... barcode sequence is on separate read, which should always be the last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part of mate 1\n- 2 ... barcode sequence is a part of mate 2" + + } + + + , + "solo_cb_position": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. position of Cell Barcode(s) on the barcode read", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. position of Cell Barcode(s) on the barcode read.\n\nPresently only works with --solo_type CB_UMI_Complex, and barcodes are assumed to be on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\nstart(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based position with of the CB start(end) with respect to the Anchor Base\nString for different barcodes are separated by space.\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--solo_cb_position 0_0_2_-1 3_1_3_8" + + } + + + , + "solo_umi_position": { + "type": + "string", + "description": "Type: `string`. position of the UMI on the barcode read, same as soloCBposition\n\nExample: inDrop (Zilionis et al, Nat", + "help_text": "Type: `string`. position of the UMI on the barcode read, same as soloCBposition\n\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--solo_cb_position 3_9_3_14" + + } + + + , + "solo_adapter_sequence": { + "type": + "string", + "description": "Type: `string`. adapter sequence to anchor barcodes", + "help_text": "Type: `string`. adapter sequence to anchor barcodes. Only one adapter sequence is allowed." + + } + + + , + "solo_adapter_mismatches_nmax": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. maximum number of mismatches allowed in adapter sequence", + "help_text": "Type: `integer`, example: `1`. maximum number of mismatches allowed in adapter sequence." + + } + + + , + "solo_cb_match_wl_type": { + "type": + "string", + "description": "Type: `string`, example: `1MM_multi`. matching the Cell Barcodes to the WhiteList\n\n- Exact ", + "help_text": "Type: `string`, example: `1MM_multi`. matching the Cell Barcodes to the WhiteList\n\n- Exact ... only exact matches allowed\n- 1MM ... only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match.\n- 1MM_multi ... multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches.\nAllowed CBs have to have at least one read with exact match. This option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for CBs with N-bases. This option matches best with CellRanger \u003e= 3.0.0\n- EditDist_2 ... allow up to edit distance of 3 fpr each of the barcodes. May include one deletion + one insertion. Only works with --solo_type CB_UMI_Complex. Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." + + } + + + , + "solo_input_sam_attr_barcode_seq": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order)", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --solo_input_sam_attr_barcode_seq CR UR .\nThis parameter is required when running STARsolo with input from SAM." + + } + + + , + "solo_input_sam_attr_barcode_qual": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order)", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --solo_input_sam_attr_barcode_qual CY UY .\nIf this parameter is \u0027-\u0027 (default), the quality \u0027H\u0027 will be assigned to all bases." + + } + + + , + "solo_strand": { + "type": + "string", + "description": "Type: `string`, example: `Forward`. strandedness of the solo libraries:\n\n- Unstranded ", + "help_text": "Type: `string`, example: `Forward`. strandedness of the solo libraries:\n\n- Unstranded ... no strand information\n- Forward ... read strand same as the original RNA molecule\n- Reverse ... read strand opposite to the original RNA molecule" + + } + + + , + "solo_features": { + "type": + "string", + "description": "Type: List of `string`, example: `Gene`, multiple_sep: `\";\"`. genomic features for which the UMI counts per Cell Barcode are collected\n\n- Gene ", + "help_text": "Type: List of `string`, example: `Gene`, multiple_sep: `\";\"`. genomic features for which the UMI counts per Cell Barcode are collected\n\n- Gene ... genes: reads match the gene transcript\n- SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull ... full gene (pre-mRNA): count all reads overlapping genes\u0027 exons and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping genes\u0027 exons and introns: prioritize 100% overlap with exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads overlapping genes\u0027 exons and introns: prioritize \u003e50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction." + + } + + + , + "solo_multi_mappers": { + "type": + "string", + "description": "Type: List of `string`, example: `Unique`, multiple_sep: `\";\"`. counting method for reads mapping to multiple genes\n\n- Unique ", + "help_text": "Type: List of `string`, example: `Unique`, multiple_sep: `\";\"`. counting method for reads mapping to multiple genes\n\n- Unique ... count only reads that map to unique genes\n- Uniform ... uniformly distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique ... distribute UMIs proportionally to unique mappers, if present, and uniformly if not.\n- EM ... multi-gene UMIs are distributed using Expectation Maximization algorithm" + + } + + + , + "solo_umi_dedup": { + "type": + "string", + "description": "Type: List of `string`, example: `1MM_All`, multiple_sep: `\";\"`. type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All ", + "help_text": "Type: List of `string`, example: `1MM_All`, multiple_sep: `\";\"`. type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All ... all UMIs with 1 mismatch distance to each other are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools, but with more stringent criteria for duplicate UMIs\n- Exact ... only exactly matching UMIs are collapsed.\n- NoDedup ... no deduplication of UMIs, count all reads.\n- 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI collapsing." + + } + + + , + "solo_umi_filtering": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. type of UMI filtering (for reads uniquely mapping to genes)\n\n- - ", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. type of UMI filtering (for reads uniquely mapping to genes)\n\n- - ... basic filtering: remove UMIs with N and homopolymers (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ... basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to more than one gene, matching CellRanger \u003e 3.0.0 .\nOnly works with --solo_umi_dedup 1MM_CR" + + } + + + , + "solo_out_file_names": { + "type": + "string", + "description": "Type: List of `string`, example: `Solo.out/;features.tsv;barcodes.tsv;matrix.mtx`, multiple_sep: `\";\"`. file names for STARsolo output:\n\nfile_name_prefix gene_names barcode_sequences cell_feature_count_matrix", + "help_text": "Type: List of `string`, example: `Solo.out/;features.tsv;barcodes.tsv;matrix.mtx`, multiple_sep: `\";\"`. file names for STARsolo output:\n\nfile_name_prefix gene_names barcode_sequences cell_feature_count_matrix" + + } + + + , + "solo_cell_filter": { + "type": + "string", + "description": "Type: List of `string`, example: `CellRanger2.2;3000;0.99;10`, multiple_sep: `\";\"`. cell filtering type and parameters\n\n- None ", + "help_text": "Type: List of `string`, example: `CellRanger2.2;3000;0.99;10`, multiple_sep: `\";\"`. cell filtering type and parameters\n\n- None ... do not output filtered cells\n- TopCells ... only report top cells by UMI count, followed by the exact number of cells\n- CellRanger2.2 ... simple filtering of CellRanger 2.2.\nCan be followed by numbers: number of expected cells, robust maximum percentile for UMI count, maximum to minimum ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000; maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\nCan be followed by 10 numeric parameters: nExpectedCells maxPercentile maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN FDR simN\nThe harcoded values are from CellRanger: 3000 0.99 10 45000 90000 500 0.01 20000 0.01 10000" + + } + + + , + "solo_out_format_features_gene_field3": { + "type": + "string", + "description": "Type: List of `string`, example: `Gene Expression`, multiple_sep: `\";\"`. field 3 in the Gene features", + "help_text": "Type: List of `string`, example: `Gene Expression`, multiple_sep: `\";\"`. field 3 in the Gene features.tsv file. If \"-\", then no 3rd field is output." + + } + + + , + "solo_cell_read_stats": { + "type": + "string", + "description": "Type: `string`. Output reads statistics for each CB\n\n- Standard ", + "help_text": "Type: `string`. Output reads statistics for each CB\n\n- Standard ... standard output" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/run parameters" + }, + + { + "$ref": "#/definitions/genome parameters" + }, + + { + "$ref": "#/definitions/splice junctions database" + }, + + { + "$ref": "#/definitions/variation parameters" + }, + + { + "$ref": "#/definitions/read parameters" + }, + + { + "$ref": "#/definitions/read clipping" + }, + + { + "$ref": "#/definitions/limits" + }, + + { + "$ref": "#/definitions/output: general" + }, + + { + "$ref": "#/definitions/output: sam and bam" + }, + + { + "$ref": "#/definitions/bam processing" + }, + + { + "$ref": "#/definitions/output wiggle" + }, + + { + "$ref": "#/definitions/output filtering" + }, + + { + "$ref": "#/definitions/output splice junctions (sj.out.tab)" + }, + + { + "$ref": "#/definitions/output filtering: splice junctions" + }, + + { + "$ref": "#/definitions/scoring" + }, + + { + "$ref": "#/definitions/alignments and seeding" + }, + + { + "$ref": "#/definitions/paired-end reads" + }, + + { + "$ref": "#/definitions/windows, anchors, binning" + }, + + { + "$ref": "#/definitions/chimeric alignments" + }, + + { + "$ref": "#/definitions/quantification of annotations" + }, + + { + "$ref": "#/definitions/2-pass mapping" + }, + + { + "$ref": "#/definitions/wasp parameters" + }, + + { + "$ref": "#/definitions/starsolo (single cell rna-seq) parameters" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/.config.vsh.yaml new file mode 100644 index 0000000..355f86e --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/.config.vsh.yaml @@ -0,0 +1,374 @@ +name: "star_genome_generate" +namespace: "star" +version: "v0.3.1" +authors: +- name: "Sai Nirmayi Yasa" + roles: + - "author" + - "maintainer" + info: + links: + email: "nirmayi@data-intuitive.com" + github: "sainirmayi" + linkedin: "sai-nirmayi-yasa" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Junior Bioinformatics Researcher" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--genome_fasta_files" + description: "Path(s) to the fasta files with the genome sequences, separated\ + \ by spaces. These files should be plain text FASTA files, they *cannot* be\ + \ zipped.\n" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--sjdb_gtf_file" + description: "Path to the GTF file with annotations" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sjdb_overhang" + description: "Length of the donor/acceptor sequence on each side of the junctions,\ + \ ideally = (mate_length - 1)" + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdb_gtf_chr_prefix" + description: "Prefix for chromosome names in a GTF file (e.g. 'chr' for using\ + \ ENSMEBL annotations with UCSC genomes)" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdb_gtf_feature_exon" + description: "Feature type in GTF file to be used as exons for building transcripts" + info: null + example: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdb_gtf_tag_exon_parent_transcript" + description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ + \ works for GTF files)" + info: null + example: + - "transcript_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdb_gtf_tag_exon_parent_gene" + description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ + \ for GTF files)" + info: null + example: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdb_gtf_tag_exon_parent_gene_name" + description: "GTF attribute name for parent gene name" + info: null + example: + - "gene_name" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--sjdb_gtf_tag_exon_parent_gene_type" + description: "GTF attribute name for parent gene type" + info: null + example: + - "gene_type" + - "gene_biotype" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "long" + name: "--limit_genome_generate_ram" + description: "Maximum available RAM (bytes) for genome generation" + info: null + example: + - 31000000000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--genome_sa_index_nbases" + description: "Length (bases) of the SA pre-indexing string. Typically between\ + \ 10 and 15. Longer strings will use much more memory, but allow faster searches.\ + \ For small genomes, this parameter must be scaled down to min(14, log2(GenomeLength)/2\ + \ - 1)." + info: null + example: + - 14 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--genome_chr_bin_nbits" + description: "Defined as log2(chrBin), where chrBin is the size of the bins for\ + \ genome storage. Each chromosome will occupy an integer number of bins. For\ + \ a genome with large number of contigs, it is recommended to scale this parameter\ + \ as min(18, log2[max(GenomeLength/NumberOfReferences,ReadLength)])." + info: null + example: + - 18 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--genome_sa_sparse_d" + description: "Suffux array sparsity, i.e. distance between indices. Use bigger\ + \ numbers to decrease needed RAM at the cost of mapping speed reduction." + info: null + example: + - 1 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--genome_suffix_length_max" + description: "Maximum length of the suffixes, has to be longer than read length.\ + \ Use -1 for infinite length." + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--genome_transform_type" + description: "Type of genome transformation\n None ... no transformation\n\ + \ Haploid ... replace reference alleles with alternative alleles from VCF\ + \ file (e.g. consensus allele)\n Diploid ... create two haplotypes for each\ + \ chromosome listed in VCF file, for genotypes 1|2, assumes perfect phasing\ + \ (e.g. personal genome)\n" + info: null + example: + - "None" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_transform_vcf" + description: "path to VCF file for genome transformation" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--index" + description: "STAR index directory." + info: null + default: + - "STAR_index" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Create index for STAR\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "genome" +- "index" +- "align" +license: "MIT" +references: + doi: + - "10.1093/bioinformatics/bts635" +links: + repository: "https://github.com/alexdobin/STAR" + documentation: "https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ + \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ + \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ + \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ + \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ + \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" + env: + - "STAR_VERSION 2.7.11b" + - "PACKAGES gcc g++ make wget zlib1g-dev unzip xxd" + - type: "docker" + run: + - "STAR --version | sed 's#\\(.*\\)#star: \"\\1\"#' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/star/star_genome_generate/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/star/star_genome_generate" + executable: "target/nextflow/star/star_genome_generate/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/main.nf new file mode 100644 index 0000000..6cc940b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/main.nf @@ -0,0 +1,4037 @@ +// star_genome_generate v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Sai Nirmayi Yasa (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "star_genome_generate", + "namespace" : "star", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Sai Nirmayi Yasa", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "nirmayi@data-intuitive.com", + "github" : "sainirmayi", + "linkedin" : "sai-nirmayi-yasa" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Junior Bioinformatics Researcher" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--genome_fasta_files", + "description" : "Path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.\n", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sjdb_gtf_file", + "description" : "Path to the GTF file with annotations", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sjdb_overhang", + "description" : "Length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)", + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdb_gtf_chr_prefix", + "description" : "Prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes)", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdb_gtf_feature_exon", + "description" : "Feature type in GTF file to be used as exons for building transcripts", + "example" : [ + "exon" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdb_gtf_tag_exon_parent_transcript", + "description" : "GTF attribute name for parent transcript ID (default \\"transcript_id\\" works for GTF files)", + "example" : [ + "transcript_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdb_gtf_tag_exon_parent_gene", + "description" : "GTF attribute name for parent gene ID (default \\"gene_id\\" works for GTF files)", + "example" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdb_gtf_tag_exon_parent_gene_name", + "description" : "GTF attribute name for parent gene name", + "example" : [ + "gene_name" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdb_gtf_tag_exon_parent_gene_type", + "description" : "GTF attribute name for parent gene type", + "example" : [ + "gene_type", + "gene_biotype" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "long", + "name" : "--limit_genome_generate_ram", + "description" : "Maximum available RAM (bytes) for genome generation", + "example" : [ + 31000000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--genome_sa_index_nbases", + "description" : "Length (bases) of the SA pre-indexing string. Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, this parameter must be scaled down to min(14, log2(GenomeLength)/2 - 1).", + "example" : [ + 14 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--genome_chr_bin_nbits", + "description" : "Defined as log2(chrBin), where chrBin is the size of the bins for genome storage. Each chromosome will occupy an integer number of bins. For a genome with large number of contigs, it is recommended to scale this parameter as min(18, log2[max(GenomeLength/NumberOfReferences,ReadLength)]).", + "example" : [ + 18 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--genome_sa_sparse_d", + "description" : "Suffux array sparsity, i.e. distance between indices. Use bigger numbers to decrease needed RAM at the cost of mapping speed reduction.", + "example" : [ + 1 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--genome_suffix_length_max", + "description" : "Maximum length of the suffixes, has to be longer than read length. Use -1 for infinite length.", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--genome_transform_type", + "description" : "Type of genome transformation\n None ... no transformation\n Haploid ... replace reference alleles with alternative alleles from VCF file (e.g. consensus allele)\n Diploid ... create two haplotypes for each chromosome listed in VCF file, for genotypes 1|2, assumes perfect phasing (e.g. personal genome)\n", + "example" : [ + "None" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_transform_vcf", + "description" : "path to VCF file for genome transformation", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--index", + "description" : "STAR index directory.", + "default" : [ + "STAR_index" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Create index for STAR\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "genome", + "index", + "align" + ], + "license" : "MIT", + "references" : { + "doi" : [ + "10.1093/bioinformatics/bts635" + ] + }, + "links" : { + "repository" : "https://github.com/alexdobin/STAR", + "documentation" : "https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\n apt-get install -y --no-install-recommends ${PACKAGES} && \\\\\n cd /tmp && \\\\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\n unzip ${STAR_VERSION}.zip && \\\\\n cd STAR-${STAR_VERSION}/source && \\\\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\n cp STAR /usr/local/bin && \\\\\n cd / && \\\\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\n apt-get --purge autoremove -y ${PACKAGES} && \\\\\n apt-get clean\n" + ], + "env" : [ + "STAR_VERSION 2.7.11b", + "PACKAGES gcc g++ make wget zlib1g-dev unzip xxd" + ] + }, + { + "type" : "docker", + "run" : [ + "STAR --version | sed 's#\\\\(.*\\\\)#star: \\"\\\\1\\"#' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/star/star_genome_generate/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/star/star_genome_generate", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -e + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_GENOME_FASTA_FILES+x} ]; then echo "${VIASH_PAR_GENOME_FASTA_FILES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome_fasta_files='&'#" ; else echo "# par_genome_fasta_files="; fi ) +$( if [ ! -z ${VIASH_PAR_SJDB_GTF_FILE+x} ]; then echo "${VIASH_PAR_SJDB_GTF_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sjdb_gtf_file='&'#" ; else echo "# par_sjdb_gtf_file="; fi ) +$( if [ ! -z ${VIASH_PAR_SJDB_OVERHANG+x} ]; then echo "${VIASH_PAR_SJDB_OVERHANG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sjdb_overhang='&'#" ; else echo "# par_sjdb_overhang="; fi ) +$( if [ ! -z ${VIASH_PAR_SJDB_GTF_CHR_PREFIX+x} ]; then echo "${VIASH_PAR_SJDB_GTF_CHR_PREFIX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sjdb_gtf_chr_prefix='&'#" ; else echo "# par_sjdb_gtf_chr_prefix="; fi ) +$( if [ ! -z ${VIASH_PAR_SJDB_GTF_FEATURE_EXON+x} ]; then echo "${VIASH_PAR_SJDB_GTF_FEATURE_EXON}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sjdb_gtf_feature_exon='&'#" ; else echo "# par_sjdb_gtf_feature_exon="; fi ) +$( if [ ! -z ${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_TRANSCRIPT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sjdb_gtf_tag_exon_parent_transcript='&'#" ; else echo "# par_sjdb_gtf_tag_exon_parent_transcript="; fi ) +$( if [ ! -z ${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_GENE+x} ]; then echo "${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sjdb_gtf_tag_exon_parent_gene='&'#" ; else echo "# par_sjdb_gtf_tag_exon_parent_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_GENE_NAME+x} ]; then echo "${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_GENE_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sjdb_gtf_tag_exon_parent_gene_name='&'#" ; else echo "# par_sjdb_gtf_tag_exon_parent_gene_name="; fi ) +$( if [ ! -z ${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_GENE_TYPE+x} ]; then echo "${VIASH_PAR_SJDB_GTF_TAG_EXON_PARENT_GENE_TYPE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sjdb_gtf_tag_exon_parent_gene_type='&'#" ; else echo "# par_sjdb_gtf_tag_exon_parent_gene_type="; fi ) +$( if [ ! -z ${VIASH_PAR_LIMIT_GENOME_GENERATE_RAM+x} ]; then echo "${VIASH_PAR_LIMIT_GENOME_GENERATE_RAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_limit_genome_generate_ram='&'#" ; else echo "# par_limit_genome_generate_ram="; fi ) +$( if [ ! -z ${VIASH_PAR_GENOME_SA_INDEX_NBASES+x} ]; then echo "${VIASH_PAR_GENOME_SA_INDEX_NBASES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome_sa_index_nbases='&'#" ; else echo "# par_genome_sa_index_nbases="; fi ) +$( if [ ! -z ${VIASH_PAR_GENOME_CHR_BIN_NBITS+x} ]; then echo "${VIASH_PAR_GENOME_CHR_BIN_NBITS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome_chr_bin_nbits='&'#" ; else echo "# par_genome_chr_bin_nbits="; fi ) +$( if [ ! -z ${VIASH_PAR_GENOME_SA_SPARSE_D+x} ]; then echo "${VIASH_PAR_GENOME_SA_SPARSE_D}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome_sa_sparse_d='&'#" ; else echo "# par_genome_sa_sparse_d="; fi ) +$( if [ ! -z ${VIASH_PAR_GENOME_SUFFIX_LENGTH_MAX+x} ]; then echo "${VIASH_PAR_GENOME_SUFFIX_LENGTH_MAX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome_suffix_length_max='&'#" ; else echo "# par_genome_suffix_length_max="; fi ) +$( if [ ! -z ${VIASH_PAR_GENOME_TRANSFORM_TYPE+x} ]; then echo "${VIASH_PAR_GENOME_TRANSFORM_TYPE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome_transform_type='&'#" ; else echo "# par_genome_transform_type="; fi ) +$( if [ ! -z ${VIASH_PAR_GENOME_TRANSFORM_VCF+x} ]; then echo "${VIASH_PAR_GENOME_TRANSFORM_VCF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome_transform_vcf='&'#" ; else echo "# par_genome_transform_vcf="; fi ) +$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +mkdir -p \\$par_index + +STAR \\\\ + --runMode genomeGenerate \\\\ + --genomeDir \\$par_index \\\\ + --genomeFastaFiles \\$par_genome_fasta_files \\\\ + \\${meta_cpus:+--runThreadN "\\${meta_cpus}"} \\\\ + \\${par_sjdb_gtf_file:+--sjdbGTFfile "\\${par_sjdb_gtf_file}"} \\\\ + \\${par_sjdbOverhang:+--sjdbOverhang "\\${par_sjdbOverhang}"} \\\\ + \\${par_genome_sa_index_nbases:+--genomeSAindexNbases "\\${par_genome_sa_index_nbases}"} \\\\ + \\${par_sjdb_gtf_chr_prefix:+--sjdbGTFchrPrefix "\\${par_sjdb_gtf_chr_prefix}"} \\\\ + \\${par_sjdb_gtf_feature_exon:+--sjdbGTFfeatureExon "\\${par_sjdb_gtf_feature_exon}"} \\\\ + \\${par_sjdb_gtf_tag_exon_parent_transcript:+--sjdbGTFtagExonParentTranscript "\\${par_sjdb_gtf_tag_exon_parent_transcript}"} \\\\ + \\${par_sjdb_gtf_tag_exon_parent_gene:+--sjdbGTFtagExonParentGene "\\${par_sjdb_gtf_tag_exon_parent_gene}"} \\\\ + \\${sjdb_gtf_tag_exon_parent_gene_name:+--sjdbGTFtagExonParentGeneName "\\${sjdb_gtf_tag_exon_parent_gene_name}"} \\\\ + \\${sjdb_gtf_tag_exon_parent_gene_type:+--sjdbGTFtagExonParentGeneType "\\${sjdb_gtf_tag_exon_parent_gene_type}"} \\\\ + \\${par_limit_genome_generate_ram:+--limitGenomeGenerateRAM "\\${par_limit_genome_generate_ram}"} \\\\ + \\${par_genome_chr_bin_nbits:+--genomeChrBinNbits "\\${par_genome_chr_bin_nbits}"} \\\\ + \\${par_genome_sa_sparse_d:+--genomeSAsparseD "\\${par_genome_sa_sparse_d}"} \\\\ + \\${par_genome_suffix_length_max:+--genomeSuffixLengthMax "\\${par_genome_suffix_length_max}"} \\\\ + \\${par_genome_transform_type:+--genomeTransformType "\\${par_genome_transform_type}"} \\\\ + \\${par_genome_transform_vcf:+--genomeTransformVCF "\\${par_genome_transform_vCF}"} \\\\ +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/star/star_genome_generate", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/nextflow.config new file mode 100644 index 0000000..8aa875e --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'star/star_genome_generate' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Create index for STAR\n' + author = 'Sai Nirmayi Yasa' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/nextflow_schema.json new file mode 100644 index 0000000..27f6172 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/nextflow_schema.json @@ -0,0 +1,245 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "star_genome_generate", +"description": "Create index for STAR\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "genome_fasta_files": { + "type": + "string", + "description": "Type: List of `file`, required, multiple_sep: `\";\"`. Path(s) to the fasta files with the genome sequences, separated by spaces", + "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.\n" + + } + + + , + "sjdb_gtf_file": { + "type": + "string", + "description": "Type: `file`. Path to the GTF file with annotations", + "help_text": "Type: `file`. Path to the GTF file with annotations" + + } + + + , + "sjdb_overhang": { + "type": + "integer", + "description": "Type: `integer`, example: `100`. Length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)", + "help_text": "Type: `integer`, example: `100`. Length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)" + + } + + + , + "sjdb_gtf_chr_prefix": { + "type": + "string", + "description": "Type: `string`. Prefix for chromosome names in a GTF file (e", + "help_text": "Type: `string`. Prefix for chromosome names in a GTF file (e.g. \u0027chr\u0027 for using ENSMEBL annotations with UCSC genomes)" + + } + + + , + "sjdb_gtf_feature_exon": { + "type": + "string", + "description": "Type: `string`, example: `exon`. Feature type in GTF file to be used as exons for building transcripts", + "help_text": "Type: `string`, example: `exon`. Feature type in GTF file to be used as exons for building transcripts" + + } + + + , + "sjdb_gtf_tag_exon_parent_transcript": { + "type": + "string", + "description": "Type: `string`, example: `transcript_id`. GTF attribute name for parent transcript ID (default \"transcript_id\" works for GTF files)", + "help_text": "Type: `string`, example: `transcript_id`. GTF attribute name for parent transcript ID (default \"transcript_id\" works for GTF files)" + + } + + + , + "sjdb_gtf_tag_exon_parent_gene": { + "type": + "string", + "description": "Type: `string`, example: `gene_id`. GTF attribute name for parent gene ID (default \"gene_id\" works for GTF files)", + "help_text": "Type: `string`, example: `gene_id`. GTF attribute name for parent gene ID (default \"gene_id\" works for GTF files)" + + } + + + , + "sjdb_gtf_tag_exon_parent_gene_name": { + "type": + "string", + "description": "Type: List of `string`, example: `gene_name`, multiple_sep: `\";\"`. GTF attribute name for parent gene name", + "help_text": "Type: List of `string`, example: `gene_name`, multiple_sep: `\";\"`. GTF attribute name for parent gene name" + + } + + + , + "sjdb_gtf_tag_exon_parent_gene_type": { + "type": + "string", + "description": "Type: List of `string`, example: `gene_type;gene_biotype`, multiple_sep: `\";\"`. GTF attribute name for parent gene type", + "help_text": "Type: List of `string`, example: `gene_type;gene_biotype`, multiple_sep: `\";\"`. GTF attribute name for parent gene type" + + } + + + , + "limit_genome_generate_ram": { + "type": + "string", + "description": "Type: `long`, example: `31000000000`. Maximum available RAM (bytes) for genome generation", + "help_text": "Type: `long`, example: `31000000000`. Maximum available RAM (bytes) for genome generation" + + } + + + , + "genome_sa_index_nbases": { + "type": + "integer", + "description": "Type: `integer`, example: `14`. Length (bases) of the SA pre-indexing string", + "help_text": "Type: `integer`, example: `14`. Length (bases) of the SA pre-indexing string. Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, this parameter must be scaled down to min(14, log2(GenomeLength)/2 - 1)." + + } + + + , + "genome_chr_bin_nbits": { + "type": + "integer", + "description": "Type: `integer`, example: `18`. Defined as log2(chrBin), where chrBin is the size of the bins for genome storage", + "help_text": "Type: `integer`, example: `18`. Defined as log2(chrBin), where chrBin is the size of the bins for genome storage. Each chromosome will occupy an integer number of bins. For a genome with large number of contigs, it is recommended to scale this parameter as min(18, log2[max(GenomeLength/NumberOfReferences,ReadLength)])." + + } + + + , + "genome_sa_sparse_d": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Suffux array sparsity, i", + "help_text": "Type: `integer`, example: `1`. Suffux array sparsity, i.e. distance between indices. Use bigger numbers to decrease needed RAM at the cost of mapping speed reduction." + + } + + + , + "genome_suffix_length_max": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. Maximum length of the suffixes, has to be longer than read length", + "help_text": "Type: `integer`, example: `-1`. Maximum length of the suffixes, has to be longer than read length. Use -1 for infinite length." + + } + + + , + "genome_transform_type": { + "type": + "string", + "description": "Type: `string`, example: `None`. Type of genome transformation\n None ", + "help_text": "Type: `string`, example: `None`. Type of genome transformation\n None ... no transformation\n Haploid ... replace reference alleles with alternative alleles from VCF file (e.g. consensus allele)\n Diploid ... create two haplotypes for each chromosome listed in VCF file, for genotypes 1|2, assumes perfect phasing (e.g. personal genome)\n" + + } + + + , + "genome_transform_vcf": { + "type": + "string", + "description": "Type: `file`. path to VCF file for genome transformation", + "help_text": "Type: `file`. path to VCF file for genome transformation" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "index": { + "type": + "string", + "description": "Type: `file`, required, default: `STAR_index`. STAR index directory", + "help_text": "Type: `file`, required, default: `STAR_index`. STAR index directory." + , + "default":"STAR_index" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/.config.vsh.yaml new file mode 100644 index 0000000..bc55589 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/.config.vsh.yaml @@ -0,0 +1,811 @@ +name: "trimgalore" +version: "v0.3.1" +authors: +- name: "Sai Nirmayi Yasa" + roles: + - "author" + - "maintainer" + info: + links: + email: "nirmayi@data-intuitive.com" + github: "sainirmayi" + linkedin: "sai-nirmayi-yasa" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Junior Bioinformatics Researcher" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "Input files. Note that paired-end files need to be supplied in a\ + \ pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz" + info: null + example: + - "sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Trimming options" + arguments: + - type: "integer" + name: "--quality" + alternatives: + - "-q" + description: "Trim low-quality ends (below the specified Phred score) from reads\ + \ in addition to adapter removal. For RRBS samples, quality trimming will be\ + \ performed first, and adapter trimming is carried in a second round. Other\ + \ files are quality and adapter trimmed in a single pass. The algorithm is the\ + \ same as the one used by BWA (Subtract INT from all qualities; compute partial\ + \ sums from all indices to the end of the sequence; cut sequence at the index\ + \ at which the sum is minimal)." + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--phred33" + description: "Instructs Cutadapt to use ASCII+33 quality scores as Phred scores\ + \ (Sanger/Illumina 1.9+ encoding) for quality trimming." + info: null + direction: "input" + - type: "boolean_true" + name: "--phred64" + description: "Instructs Cutadapt to use ASCII+64 quality scores as Phred scores\ + \ (Illumina 1.5 encoding) for quality trimming." + info: null + direction: "input" + - type: "boolean_true" + name: "--fastqc" + description: "Run FastQC in the default mode on the FastQ file once trimming is\ + \ complete." + info: null + direction: "input" + - type: "string" + name: "--fastqc_args" + description: "Passes extra arguments (excluding files) to FastQC. If more than\ + \ one argument is to be passed to FastQC they must be in the form \"arg1 arg2\ + \ ...\". Passing extra arguments will automatically invoke FastQC, so --fastqc\ + \ does not have to be specified separately." + info: null + example: + - "--nogroup --noextract" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_contaminants" + description: "Specifies a non-default file which contains the list of contaminants\ + \ for FastQC to screen overrepresented sequences against. The file must contain\ + \ sets of named contaminants in the form name[tab]sequence. Lines prefixed with\ + \ a hash will be ignored." + info: null + example: + - "contaminants.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_adapters" + description: "Specifies a non-default file which contains the list of adapter\ + \ sequences which which FasstQC will explicity search against the library. The\ + \ file must contain sets of named adapters in the form name[tab]sequence. Lines\ + \ prefixed with a hash will be ignored." + info: null + example: + - "adapters.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_limits" + description: "Specifies a non-default file which contains a set of criteria which\ + \ FastQC will use to determine the warn/error limits for the various modules.\ + \ This file can also be used to selectively remove some modules from the output\ + \ all together. The format needs to mirror the default limits.txt file found\ + \ in the Configuration folder." + info: null + example: + - "limits.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--adapter" + alternatives: + - "-a" + description: "Adapter sequence to be trimmed. If not specified explicitly, Trim\ + \ Galore will try to auto-detect whether the Illumina universal, Nextera transposase\ + \ or Illumina small RNA adapter sequence was used. A single base may also be\ + \ given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA. \nAt a special request,\ + \ multiple adapters can also be specified like so: \n -a \" AGCTCCCG -a TTTCATTATAT\ + \ -a TTTATTCGGATTTAT\" -a2 \" AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT\", \nor\ + \ so:\n -a \"file:../multiple_adapters.fa\" -a2 \"file:../different_adapters.fa\"\ + \nPotentially in conjucntion with the parameter \"-n 3\" to trim all adapters.\ + \ \n" + info: null + example: + - "AGCTCCCG" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--adapter2" + alternatives: + - "-a2" + description: "Optional adapter sequence to be trimmed off read 2 of paired-end\ + \ files. This option requires '--paired' to be specified as well. If the libraries\ + \ to be trimmed are smallRNA then a2 will be set to the Illumina small RNA 5'\ + \ adapter automatically (GATCGTCGGACT). A single base may also be given as e.g.\ + \ -a2 A{10}, to be expanded to -a2 AAAAAAAAAA." + info: null + example: + - "AGCTCCCG" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--illumina" + description: "Adapter sequence to be trimmed is the first 13bp of the Illumina\ + \ universal adapter 'AGATCGGAAGAGC' instead of the default auto-detection of\ + \ adapter sequence." + info: null + direction: "input" + - type: "boolean_true" + name: "--stranded_illumina" + description: "Adapter sequence to be trimmed is the first 13bp of the Illumina\ + \ stranded mRNA or Total RNA adapter 'ACTGTCTCTTATA' instead of the default\ + \ auto-detection of adapter sequence." + info: null + direction: "input" + - type: "boolean_true" + name: "--nextera" + description: "Adapter sequence to be trimmed is the first 12bp of the Nextera\ + \ adapter 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence." + info: null + direction: "input" + - type: "boolean_true" + name: "--small_rna" + description: "Adapter sequence to be trimmed is the first 12bp of the Illumina\ + \ Small RNA 3' Adapter 'TGGAATTCTCGG' instead of the default auto-detection\ + \ of adapter sequence. Selecting to trim smallRNA adapters will also lower the\ + \ --length value to 18bp. If the smallRNA libraries are paired-end then a automatically\ + \ (GATCGTCGGACT) unless -a 2 had been defined explicitly." + info: null + direction: "input" + - type: "integer" + name: "--consider_already_trimmed" + description: "During adapter auto-detection, the limit set by this argument allows\ + \ the user to set a threshold up to which the file is considered already adapter-trimmed.\ + \ If no adapter sequence exceeds this threshold, no additional adapter trimming\ + \ will be performed (technically, the adapter is set to '-a X'). Quality trimming\ + \ is still performed as usual." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_length" + description: "Discard reads that are longer than the specified value after trimming.\ + \ This is only advised for smallRNA sequencing to remove non-small RNA sequences." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--stringency" + description: "Overlap with adapter sequence required to trim a sequence. Defaults\ + \ to a very stringent setting of 1, i.e. even a single bp of overlapping sequence\ + \ will be trimmed off from the 3' end of any read." + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--error_rate" + alternatives: + - "-e" + description: "Maximum allowed error rate (no. of errors divided by the length\ + \ of the matching region)" + info: null + example: + - 0.1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--gzip" + description: "Compress the output file with GZIP. If the input files are GZIP-compressed\ + \ the output files will automatically be GZIP compressed as well. As of v0.2.8\ + \ the compression will take place on the fly." + info: null + direction: "input" + - type: "boolean_true" + name: "--dont_gzip" + description: "Output files won't be compressed with GZIP. This option overrides\ + \ --gzip." + info: null + direction: "input" + - type: "integer" + name: "--length" + description: "Discard reads that became shorter than the specified length because\ + \ of either quality or adapter trimming. A value of '0' effectively disables\ + \ this behaviour. For paired-end files, both reads of a read-pair need to be\ + \ longer than the specified length to be printed out to validated paired-end\ + \ files. If only one read became too short there is the possibility of keeping\ + \ such unpaired single-end reads using the --retain_unpaired option." + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_n" + description: "The total number of Ns a read may contain before it will be removed\ + \ altogether.In a paired-end setting, either read exceeding this limit will\ + \ result in the entire pair being removed from the trimmed output files. If\ + \ COUNT is a number between 0 and 1, it is interpreted as a fraction of the\ + \ read length." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--trim_n" + description: "Removes Ns from either side of the read. This option does currently\ + \ not work in RRBS mode." + info: null + direction: "input" + - type: "boolean_true" + name: "--no_report_file" + description: "If specified no report file will be generated." + info: null + direction: "input" + - type: "boolean_true" + name: "--suppress_warn" + description: "If specified any output to STDOUT or STDERR will be suppressed." + info: null + direction: "input" + - type: "integer" + name: "--clip_R1" + description: "Instructs TrimGalore to remove given number of bp from the 5' end\ + \ of read 1 (or single-end reads). This may be useful if the qualities were\ + \ very poor, or if there is some sort of unwanted bias at the 5' end." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--clip_R2" + description: "Instructs TrimGalore to remove given number bp from the 5' end of\ + \ read 2 (paired-end reads only). This may be useful if the qualities were very\ + \ poor, or if there is some sort of unwanted bias at the 5' end. For paired-end\ + \ BS-Seq, it is recommended to remove the first few bp because the end-repair\ + \ reaction may introduce a bias towards low methylation." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--three_prime_clip_R1" + description: "Instructs Trim Galore to remove spacified number of bp from the\ + \ 3' end of read 1 (or single-end reads) AFTER adapter/quality trimming has\ + \ been performed. This may remove some bias from the 3' end that is not directly\ + \ related to adapter sequence or basecall quality." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--three_prime_clip_R2" + description: "Instructs Trim Galore to remove bp from the 3' end of read\ + \ 2 AFTER adapter/quality trimming has been performed. This may remove some\ + \ unwanted bias from the 3' end that is not directly related to adapter sequence\ + \ or basecall quality." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--nextseq" + description: "This enables the option '--nextseq-trim=3'CUTOFF' within Cutadapt,\ + \ which will set a quality cutoff (that is normally given with -q instead),\ + \ but qualities of G bases are ignored. This trimming is in common for the NextSeq-\ + \ and NovaSeq-platforms, where basecalls without any signal are called as high-quality\ + \ G bases. This is mutually exlusive with '-q INT'." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--basename" + description: "Use specified name (PREFERRED_NAME) as the basename for output files,\ + \ instead of deriving the filenames from the input files. Single-end data would\ + \ be called PREFERRED_NAME_trimmed.fq(.gz), or PREFERRED_NAME_val_1.fq(.gz)\ + \ and PREFERRED_NAME_val_2.fq(.gz) for paired-end data. --basename only works\ + \ when 1 file (single-end) or 2 files (paired-end) are specified, but not for\ + \ longer lists." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Specific trimming options without adapter/quality trimming" + arguments: + - type: "integer" + name: "--hardtrim5" + description: "Instead of performing adapter-/quality trimming, this option will\ + \ simply hard-trim sequences to bp at the 5'-end. Once hard-trimming of\ + \ files is complete, Trim Galore will exit. Hard-trimmed output files will end\ + \ in ._5prime.fq(.gz)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--hardtrim3" + description: "Instead of performing adapter-/quality trimming, this option will\ + \ simply hard-trim sequences to bp at the 3'-end. Once hard-trimming of\ + \ files is complete, Trim Galore will exit. Hard-trimmed output files will end\ + \ in ._3prime.fq(.gz)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--clock" + description: "In this mode, reads are trimmed in a specific way that is currently\ + \ used for the Mouse Epigenetic Clock." + info: null + direction: "input" + - type: "boolean_true" + name: "--polyA" + description: "This is a new, still experimental, trimming mode to identify and\ + \ remove poly-A tails from sequences. When --polyA is selected, Trim Galore\ + \ attempts to identify from the first supplied sample whether sequences contain\ + \ more often a stretch of either 'AAAAAAAAAA' or 'TTTTTTTTTT'. This determines\ + \ if Read 1 of a paired-end end file, or single-end files, are trimmed for PolyA\ + \ or PolyT. In case of paired-end sequencing, Read2 is trimmed for the complementary\ + \ base from the start of the reads. The auto-detection uses a default of A{20}\ + \ for Read1 (3'-end trimming) and T{150} for Read2 (5'-end trimming). These\ + \ values may be changed manually using the options -a and -a2. In addition to\ + \ trimming the sequences, white spaces are replaced with _ and it records in\ + \ the read ID how many bases were trimmed so it can later be used to identify\ + \ PolyA trimmed sequences. This is currently done by writing tags to both the\ + \ start (\"32:A:\") and end (\"_PolyA:32\") of the reads. The poly-A trimming\ + \ mode expects that sequences were both adapter and quality before looking\ + \ for Poly-A tails, and it is the user's responsibility to carry out an initial\ + \ round of trimming." + info: null + direction: "input" + - type: "boolean_true" + name: "--implicon" + description: "This is a special mode of operation for paired-end data, such as\ + \ required for the IMPLICON method, where a UMI sequence is getting transferred\ + \ from the start of Read 2 to the readID of both reads. Following this, Trim\ + \ Galore will exit. In it's current implementation, the UMI carrying reads come\ + \ in the following format\n Read 1 5' FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\ + \ 3'\n Read 2 3' UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5'\nWhere UUUUUUUU is\ + \ a random 8-mer unique molecular identifier (UMI) and FFFFFFF... is the actual\ + \ fragment to be sequenced. The UMI of Read 2 (R2) is written into the read\ + \ ID of both reads and removed from the actual sequence.\n" + info: null + direction: "input" +- name: "RRBS-specific options" + arguments: + - type: "boolean_true" + name: "--rrbs" + description: "Specifies that the input file was an MspI digested RRBS sample (recognition\ + \ site is CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed\ + \ will have a further 2 bp removed from their 3' end. Sequences which were merely\ + \ trimmed because of poor quality will not be shortened further. Read 2 of paired-end\ + \ libraries will in addition have the first 2 bp removed from the 5' end (by\ + \ setting '--clip_r2 2'). This is to avoid using artificial methylation calls\ + \ from the filled-in cytosine positions close to the 3' MspI site in sequenced\ + \ fragments. This option is not recommended for users of the Tecan Ovation RRBS\ + \ Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below)." + info: null + direction: "input" + - type: "boolean_true" + name: "--non_directional" + description: "Selecting this option for non-directional RRBS libraries will screen\ + \ quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read and,\ + \ if found, removes the first two basepairs. Like with the option '--rrbs' this\ + \ avoids using cytosine positions that were filled-in during the end-repair\ + \ step. '--non_directional' requires '--rrbs' to be specified as well. Note\ + \ that this option does not set '--clip_r2 2' in paired-end mode." + info: null + direction: "input" + - type: "boolean_true" + name: "--keep" + description: "Keep the quality trimmed intermediate file." + info: null + direction: "input" +- name: "Paired-end specific options" + arguments: + - type: "boolean_true" + name: "--paired" + description: "This option performs length trimming of quality/adapter/RRBS trimmed\ + \ reads for paired-end files. To pass the validation test, both sequences of\ + \ a sequence pair are required to have a certain minimum length which is governed\ + \ by the option --length (see above). If only one read passes this length threshold\ + \ the other read can be rescued (see option --retain_unpaired). Using this option\ + \ lets you discard too short read pairs without disturbing the sequence-by-sequence\ + \ order of FastQ files which is required by many aligners. Trim Galore expects\ + \ paired-end files to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq\ + \ SRR2_1.fq.gz SRR2_2.fq.gz ... ." + info: null + direction: "input" + - type: "boolean_true" + name: "--retain_unpaired" + description: "If only one of the two paired-end reads became too short, the longer\ + \ read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq' output\ + \ files. The length cutoff for unpaired single-end reads is governed by the\ + \ parameters -r1/--length_1 and -r2/--length_2." + info: null + direction: "input" + - type: "integer" + name: "--length_1" + alternatives: + - "-r1" + description: "Unpaired single-end read length cutoff needed for read 1 to be written\ + \ to '.unpaired_1.fq' output file. These reads may be mapped in single-end mode." + info: null + example: + - 35 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--length_2" + alternatives: + - "-r2" + description: "Unpaired single-end read length cutoff needed for read 2 to be written\ + \ to '.unpaired_2.fq' output file. These reads may be mapped in single-end mode." + info: null + example: + - 35 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_dir" + alternatives: + - "-o" + description: "If specified all output will be written to this directory instead\ + \ of the current directory." + info: null + default: + - "trimmed_output" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trimmed_r1" + description: "Output file for read 1. Only works when 1 file (single-end) or 2\ + \ files (paired-end) are specified, but not for longer lists." + info: null + example: + - "read_1.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trimmed_r2" + description: "Output file for read 2. Only works when 1 file (single-end) or 2\ + \ files (paired-end) are specified, but not for longer lists." + info: null + example: + - "read_2.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trimming_report_r1" + description: "Trimming report for read 1. Only works when 1 file (single-end)\ + \ or 2 files (paired-end) are specified, but not for longer lists." + info: null + example: + - "read_1.trimming_report.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trimming_report_r2" + description: "Trimming report for read 1. Only works when 1 file (single-end)\ + \ or 2 files (paired-end) are specified, but not for longer lists." + info: null + example: + - "read_2.trimming_report.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trimmed_fastqc_html_1" + description: "FastQC report for trimmed (single-end) reads (or read 1 for paired-end).\ + \ Only works when 1 file (single-end) or 2 files (paired-end) are specified,\ + \ but not for longer lists." + info: null + example: + - "read_1.fastqc.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trimmed_fastqc_html_2" + description: "FastQC report for trimmed reads (read2 for paired-end). Only works\ + \ when 1 file (single-end) or 2 files (paired-end) are specified, but not for\ + \ longer lists." + info: null + example: + - "read_2.fastqc.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trimmed_fastqc_zip_1" + description: "FastQC results for trimmed (single-end) reads (or read 1 for paired-end).\ + \ Only works when 1 file (single-end) or 2 files (paired-end) are specified,\ + \ but not for longer lists." + info: null + example: + - "read_1.fastqc.zip" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trimmed_fastqc_zip_2" + description: "FastQC results for trimmed reads (read2 for paired-end). Only works\ + \ when 1 file (single-end) or 2 files (paired-end) are specified, but not for\ + \ longer lists." + info: null + example: + - "read_2.fastqc.zip" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--unpaired_r1" + description: "Output file for unpired read 1. Only works when 1 file (single-end)\ + \ or 2 files (paired-end) are specified, but not for longer lists." + info: null + example: + - "unpaired_read_1.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--unpaired_r2" + description: "Output file for unpaired read 2. Only works when 1 file (single-end)\ + \ or 2 files (paired-end) are specified, but not for longer lists." + info: null + example: + - "unpaired_read_2.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "A wrapper tool around Cutadapt and FastQC to consistently apply quality\ + \ and adapter trimming to FastQ files. \n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "trimming" +- "adapters" +license: "GPL-3.0" +references: + doi: + - "10.5281/zenodo.7598955" +links: + repository: "https://github.com/FelixKrueger/TrimGalore" + homepage: "https://github.com/FelixKrueger/TrimGalore" + documentation: "https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/trim-galore:0.6.10--hdfd78af_0" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "echo \"TrimGalore: `trim_galore --version | sed -n 's/.*version\\s\\+\\([0-9]\\\ + +\\.[0-9]\\+\\.[0-9]\\+\\).*/\\1/p'`\" > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/trimgalore/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/trimgalore" + executable: "target/nextflow/trimgalore/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/main.nf new file mode 100644 index 0000000..f3af325 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/main.nf @@ -0,0 +1,4539 @@ +// trimgalore v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Sai Nirmayi Yasa (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "trimgalore", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Sai Nirmayi Yasa", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "nirmayi@data-intuitive.com", + "github" : "sainirmayi", + "linkedin" : "sai-nirmayi-yasa" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Junior Bioinformatics Researcher" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input files. Note that paired-end files need to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz", + "example" : [ + "sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Trimming options", + "arguments" : [ + { + "type" : "integer", + "name" : "--quality", + "alternatives" : [ + "-q" + ], + "description" : "Trim low-quality ends (below the specified Phred score) from reads in addition to adapter removal. For RRBS samples, quality trimming will be performed first, and adapter trimming is carried in a second round. Other files are quality and adapter trimmed in a single pass. The algorithm is the same as the one used by BWA (Subtract INT from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal).", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--phred33", + "description" : "Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1.9+ encoding) for quality trimming.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--phred64", + "description" : "Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1.5 encoding) for quality trimming.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--fastqc", + "description" : "Run FastQC in the default mode on the FastQ file once trimming is complete.", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--fastqc_args", + "description" : "Passes extra arguments (excluding files) to FastQC. If more than one argument is to be passed to FastQC they must be in the form \\"arg1 arg2 ...\\". Passing extra arguments will automatically invoke FastQC, so --fastqc does not have to be specified separately.", + "example" : [ + "--nogroup --noextract" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_contaminants", + "description" : "Specifies a non-default file which contains the list of contaminants for FastQC to screen overrepresented sequences against. The file must contain sets of named contaminants in the form name[tab]sequence. Lines prefixed with a hash will be ignored.", + "example" : [ + "contaminants.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_adapters", + "description" : "Specifies a non-default file which contains the list of adapter sequences which which FasstQC will explicity search against the library. The file must contain sets of named adapters in the form name[tab]sequence. Lines prefixed with a hash will be ignored.", + "example" : [ + "adapters.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_limits", + "description" : "Specifies a non-default file which contains a set of criteria which FastQC will use to determine the warn/error limits for the various modules. This file can also be used to selectively remove some modules from the output all together. The format needs to mirror the default limits.txt file found in the Configuration folder.", + "example" : [ + "limits.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--adapter", + "alternatives" : [ + "-a" + ], + "description" : "Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will try to auto-detect whether the Illumina universal, Nextera transposase or Illumina small RNA adapter sequence was used. A single base may also be given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA. \nAt a special request, multiple adapters can also be specified like so: \n -a \\" AGCTCCCG -a TTTCATTATAT -a TTTATTCGGATTTAT\\" -a2 \\" AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT\\", \nor so:\n -a \\"file:../multiple_adapters.fa\\" -a2 \\"file:../different_adapters.fa\\"\nPotentially in conjucntion with the parameter \\"-n 3\\" to trim all adapters. \n", + "example" : [ + "AGCTCCCG" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--adapter2", + "alternatives" : [ + "-a2" + ], + "description" : "Optional adapter sequence to be trimmed off read 2 of paired-end files. This option requires '--paired' to be specified as well. If the libraries to be trimmed are smallRNA then a2 will be set to the Illumina small RNA 5' adapter automatically (GATCGTCGGACT). A single base may also be given as e.g. -a2 A{10}, to be expanded to -a2 AAAAAAAAAA.", + "example" : [ + "AGCTCCCG" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--illumina", + "description" : "Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter 'AGATCGGAAGAGC' instead of the default auto-detection of adapter sequence.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--stranded_illumina", + "description" : "Adapter sequence to be trimmed is the first 13bp of the Illumina stranded mRNA or Total RNA adapter 'ACTGTCTCTTATA' instead of the default auto-detection of adapter sequence.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--nextera", + "description" : "Adapter sequence to be trimmed is the first 12bp of the Nextera adapter 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--small_rna", + "description" : "Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA 3' Adapter 'TGGAATTCTCGG' instead of the default auto-detection of adapter sequence. Selecting to trim smallRNA adapters will also lower the --length value to 18bp. If the smallRNA libraries are paired-end then a automatically (GATCGTCGGACT) unless -a 2 had been defined explicitly.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--consider_already_trimmed", + "description" : "During adapter auto-detection, the limit set by this argument allows the user to set a threshold up to which the file is considered already adapter-trimmed. If no adapter sequence exceeds this threshold, no additional adapter trimming will be performed (technically, the adapter is set to '-a X'). Quality trimming is still performed as usual.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--max_length", + "description" : "Discard reads that are longer than the specified value after trimming. This is only advised for smallRNA sequencing to remove non-small RNA sequences.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--stringency", + "description" : "Overlap with adapter sequence required to trim a sequence. Defaults to a very stringent setting of 1, i.e. even a single bp of overlapping sequence will be trimmed off from the 3' end of any read.", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--error_rate", + "alternatives" : [ + "-e" + ], + "description" : "Maximum allowed error rate (no. of errors divided by the length of the matching region)", + "example" : [ + 0.1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--gzip", + "description" : "Compress the output file with GZIP. If the input files are GZIP-compressed the output files will automatically be GZIP compressed as well. As of v0.2.8 the compression will take place on the fly.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--dont_gzip", + "description" : "Output files won't be compressed with GZIP. This option overrides --gzip.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--length", + "description" : "Discard reads that became shorter than the specified length because of either quality or adapter trimming. A value of '0' effectively disables this behaviour. For paired-end files, both reads of a read-pair need to be longer than the specified length to be printed out to validated paired-end files. If only one read became too short there is the possibility of keeping such unpaired single-end reads using the --retain_unpaired option.", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--max_n", + "description" : "The total number of Ns a read may contain before it will be removed altogether.In a paired-end setting, either read exceeding this limit will result in the entire pair being removed from the trimmed output files. If COUNT is a number between 0 and 1, it is interpreted as a fraction of the read length.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--trim_n", + "description" : "Removes Ns from either side of the read. This option does currently not work in RRBS mode.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_report_file", + "description" : "If specified no report file will be generated.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--suppress_warn", + "description" : "If specified any output to STDOUT or STDERR will be suppressed.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--clip_R1", + "description" : "Instructs TrimGalore to remove given number of bp from the 5' end of read 1 (or single-end reads). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--clip_R2", + "description" : "Instructs TrimGalore to remove given number bp from the 5' end of read 2 (paired-end reads only). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. For paired-end BS-Seq, it is recommended to remove the first few bp because the end-repair reaction may introduce a bias towards low methylation.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--three_prime_clip_R1", + "description" : "Instructs Trim Galore to remove spacified number of bp from the 3' end of read 1 (or single-end reads) AFTER adapter/quality trimming has been performed. This may remove some bias from the 3' end that is not directly related to adapter sequence or basecall quality.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--three_prime_clip_R2", + "description" : "Instructs Trim Galore to remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed. This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--nextseq", + "description" : "This enables the option '--nextseq-trim=3'CUTOFF' within Cutadapt, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases. This is mutually exlusive with '-q INT'.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--basename", + "description" : "Use specified name (PREFERRED_NAME) as the basename for output files, instead of deriving the filenames from the input files. Single-end data would be called PREFERRED_NAME_trimmed.fq(.gz), or PREFERRED_NAME_val_1.fq(.gz) and PREFERRED_NAME_val_2.fq(.gz) for paired-end data. --basename only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Specific trimming options without adapter/quality trimming", + "arguments" : [ + { + "type" : "integer", + "name" : "--hardtrim5", + "description" : "Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to bp at the 5'-end. Once hard-trimming of files is complete, Trim Galore will exit. Hard-trimmed output files will end in ._5prime.fq(.gz).", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--hardtrim3", + "description" : "Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to bp at the 3'-end. Once hard-trimming of files is complete, Trim Galore will exit. Hard-trimmed output files will end in ._3prime.fq(.gz).", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--clock", + "description" : "In this mode, reads are trimmed in a specific way that is currently used for the Mouse Epigenetic Clock.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--polyA", + "description" : "This is a new, still experimental, trimming mode to identify and remove poly-A tails from sequences. When --polyA is selected, Trim Galore attempts to identify from the first supplied sample whether sequences contain more often a stretch of either 'AAAAAAAAAA' or 'TTTTTTTTTT'. This determines if Read 1 of a paired-end end file, or single-end files, are trimmed for PolyA or PolyT. In case of paired-end sequencing, Read2 is trimmed for the complementary base from the start of the reads. The auto-detection uses a default of A{20} for Read1 (3'-end trimming) and T{150} for Read2 (5'-end trimming). These values may be changed manually using the options -a and -a2. In addition to trimming the sequences, white spaces are replaced with _ and it records in the read ID how many bases were trimmed so it can later be used to identify PolyA trimmed sequences. This is currently done by writing tags to both the start (\\"32:A:\\") and end (\\"_PolyA:32\\") of the reads. The poly-A trimming mode expects that sequences were both adapter and quality before looking for Poly-A tails, and it is the user's responsibility to carry out an initial round of trimming.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--implicon", + "description" : "This is a special mode of operation for paired-end data, such as required for the IMPLICON method, where a UMI sequence is getting transferred from the start of Read 2 to the readID of both reads. Following this, Trim Galore will exit. In it's current implementation, the UMI carrying reads come in the following format\n Read 1 5' FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 3'\n Read 2 3' UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5'\nWhere UUUUUUUU is a random 8-mer unique molecular identifier (UMI) and FFFFFFF... is the actual fragment to be sequenced. The UMI of Read 2 (R2) is written into the read ID of both reads and removed from the actual sequence.\n", + "direction" : "input" + } + ] + }, + { + "name" : "RRBS-specific options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--rrbs", + "description" : "Specifies that the input file was an MspI digested RRBS sample (recognition site is CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed will have a further 2 bp removed from their 3' end. Sequences which were merely trimmed because of poor quality will not be shortened further. Read 2 of paired-end libraries will in addition have the first 2 bp removed from the 5' end (by setting '--clip_r2 2'). This is to avoid using artificial methylation calls from the filled-in cytosine positions close to the 3' MspI site in sequenced fragments. This option is not recommended for users of the Tecan Ovation RRBS Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below).", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--non_directional", + "description" : "Selecting this option for non-directional RRBS libraries will screen quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read and, if found, removes the first two basepairs. Like with the option '--rrbs' this avoids using cytosine positions that were filled-in during the end-repair step. '--non_directional' requires '--rrbs' to be specified as well. Note that this option does not set '--clip_r2 2' in paired-end mode.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--keep", + "description" : "Keep the quality trimmed intermediate file.", + "direction" : "input" + } + ] + }, + { + "name" : "Paired-end specific options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--paired", + "description" : "This option performs length trimming of quality/adapter/RRBS trimmed reads for paired-end files. To pass the validation test, both sequences of a sequence pair are required to have a certain minimum length which is governed by the option --length (see above). If only one read passes this length threshold the other read can be rescued (see option --retain_unpaired). Using this option lets you discard too short read pairs without disturbing the sequence-by-sequence order of FastQ files which is required by many aligners. Trim Galore expects paired-end files to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... .", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--retain_unpaired", + "description" : "If only one of the two paired-end reads became too short, the longer read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq' output files. The length cutoff for unpaired single-end reads is governed by the parameters -r1/--length_1 and -r2/--length_2.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--length_1", + "alternatives" : [ + "-r1" + ], + "description" : "Unpaired single-end read length cutoff needed for read 1 to be written to '.unpaired_1.fq' output file. These reads may be mapped in single-end mode.", + "example" : [ + 35 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--length_2", + "alternatives" : [ + "-r2" + ], + "description" : "Unpaired single-end read length cutoff needed for read 2 to be written to '.unpaired_2.fq' output file. These reads may be mapped in single-end mode.", + "example" : [ + 35 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output_dir", + "alternatives" : [ + "-o" + ], + "description" : "If specified all output will be written to this directory instead of the current directory.", + "default" : [ + "trimmed_output" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trimmed_r1", + "description" : "Output file for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.", + "example" : [ + "read_1.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trimmed_r2", + "description" : "Output file for read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.", + "example" : [ + "read_2.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trimming_report_r1", + "description" : "Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.", + "example" : [ + "read_1.trimming_report.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trimming_report_r2", + "description" : "Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.", + "example" : [ + "read_2.trimming_report.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trimmed_fastqc_html_1", + "description" : "FastQC report for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.", + "example" : [ + "read_1.fastqc.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trimmed_fastqc_html_2", + "description" : "FastQC report for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.", + "example" : [ + "read_2.fastqc.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trimmed_fastqc_zip_1", + "description" : "FastQC results for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.", + "example" : [ + "read_1.fastqc.zip" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trimmed_fastqc_zip_2", + "description" : "FastQC results for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.", + "example" : [ + "read_2.fastqc.zip" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--unpaired_r1", + "description" : "Output file for unpired read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.", + "example" : [ + "unpaired_read_1.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--unpaired_r2", + "description" : "Output file for unpaired read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.", + "example" : [ + "unpaired_read_2.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "A wrapper tool around Cutadapt and FastQC to consistently apply quality and adapter trimming to FastQ files. \n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "trimming", + "adapters" + ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.5281/zenodo.7598955" + ] + }, + "links" : { + "repository" : "https://github.com/FelixKrueger/TrimGalore", + "homepage" : "https://github.com/FelixKrueger/TrimGalore", + "documentation" : "https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/trim-galore:0.6.10--hdfd78af_0", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "echo \\"TrimGalore: `trim_galore --version | sed -n 's/.*version\\\\s\\\\+\\\\([0-9]\\\\+\\\\.[0-9]\\\\+\\\\.[0-9]\\\\+\\\\).*/\\\\1/p'`\\" > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/trimgalore/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/trimgalore", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_QUALITY+x} ]; then echo "${VIASH_PAR_QUALITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quality='&'#" ; else echo "# par_quality="; fi ) +$( if [ ! -z ${VIASH_PAR_PHRED33+x} ]; then echo "${VIASH_PAR_PHRED33}" | sed "s#'#'\\"'\\"'#g;s#.*#par_phred33='&'#" ; else echo "# par_phred33="; fi ) +$( if [ ! -z ${VIASH_PAR_PHRED64+x} ]; then echo "${VIASH_PAR_PHRED64}" | sed "s#'#'\\"'\\"'#g;s#.*#par_phred64='&'#" ; else echo "# par_phred64="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQC+x} ]; then echo "${VIASH_PAR_FASTQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc='&'#" ; else echo "# par_fastqc="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQC_ARGS+x} ]; then echo "${VIASH_PAR_FASTQC_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_args='&'#" ; else echo "# par_fastqc_args="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQC_CONTAMINANTS+x} ]; then echo "${VIASH_PAR_FASTQC_CONTAMINANTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_contaminants='&'#" ; else echo "# par_fastqc_contaminants="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQC_ADAPTERS+x} ]; then echo "${VIASH_PAR_FASTQC_ADAPTERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_adapters='&'#" ; else echo "# par_fastqc_adapters="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQC_LIMITS+x} ]; then echo "${VIASH_PAR_FASTQC_LIMITS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_limits='&'#" ; else echo "# par_fastqc_limits="; fi ) +$( if [ ! -z ${VIASH_PAR_ADAPTER+x} ]; then echo "${VIASH_PAR_ADAPTER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapter='&'#" ; else echo "# par_adapter="; fi ) +$( if [ ! -z ${VIASH_PAR_ADAPTER2+x} ]; then echo "${VIASH_PAR_ADAPTER2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapter2='&'#" ; else echo "# par_adapter2="; fi ) +$( if [ ! -z ${VIASH_PAR_ILLUMINA+x} ]; then echo "${VIASH_PAR_ILLUMINA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_illumina='&'#" ; else echo "# par_illumina="; fi ) +$( if [ ! -z ${VIASH_PAR_STRANDED_ILLUMINA+x} ]; then echo "${VIASH_PAR_STRANDED_ILLUMINA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_stranded_illumina='&'#" ; else echo "# par_stranded_illumina="; fi ) +$( if [ ! -z ${VIASH_PAR_NEXTERA+x} ]; then echo "${VIASH_PAR_NEXTERA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nextera='&'#" ; else echo "# par_nextera="; fi ) +$( if [ ! -z ${VIASH_PAR_SMALL_RNA+x} ]; then echo "${VIASH_PAR_SMALL_RNA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_small_rna='&'#" ; else echo "# par_small_rna="; fi ) +$( if [ ! -z ${VIASH_PAR_CONSIDER_ALREADY_TRIMMED+x} ]; then echo "${VIASH_PAR_CONSIDER_ALREADY_TRIMMED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_consider_already_trimmed='&'#" ; else echo "# par_consider_already_trimmed="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_LENGTH+x} ]; then echo "${VIASH_PAR_MAX_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_length='&'#" ; else echo "# par_max_length="; fi ) +$( if [ ! -z ${VIASH_PAR_STRINGENCY+x} ]; then echo "${VIASH_PAR_STRINGENCY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_stringency='&'#" ; else echo "# par_stringency="; fi ) +$( if [ ! -z ${VIASH_PAR_ERROR_RATE+x} ]; then echo "${VIASH_PAR_ERROR_RATE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_error_rate='&'#" ; else echo "# par_error_rate="; fi ) +$( if [ ! -z ${VIASH_PAR_GZIP+x} ]; then echo "${VIASH_PAR_GZIP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gzip='&'#" ; else echo "# par_gzip="; fi ) +$( if [ ! -z ${VIASH_PAR_DONT_GZIP+x} ]; then echo "${VIASH_PAR_DONT_GZIP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dont_gzip='&'#" ; else echo "# par_dont_gzip="; fi ) +$( if [ ! -z ${VIASH_PAR_LENGTH+x} ]; then echo "${VIASH_PAR_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_length='&'#" ; else echo "# par_length="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_N+x} ]; then echo "${VIASH_PAR_MAX_N}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_n='&'#" ; else echo "# par_max_n="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIM_N+x} ]; then echo "${VIASH_PAR_TRIM_N}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trim_n='&'#" ; else echo "# par_trim_n="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_REPORT_FILE+x} ]; then echo "${VIASH_PAR_NO_REPORT_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_report_file='&'#" ; else echo "# par_no_report_file="; fi ) +$( if [ ! -z ${VIASH_PAR_SUPPRESS_WARN+x} ]; then echo "${VIASH_PAR_SUPPRESS_WARN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_suppress_warn='&'#" ; else echo "# par_suppress_warn="; fi ) +$( if [ ! -z ${VIASH_PAR_CLIP_R1+x} ]; then echo "${VIASH_PAR_CLIP_R1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_clip_R1='&'#" ; else echo "# par_clip_R1="; fi ) +$( if [ ! -z ${VIASH_PAR_CLIP_R2+x} ]; then echo "${VIASH_PAR_CLIP_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_clip_R2='&'#" ; else echo "# par_clip_R2="; fi ) +$( if [ ! -z ${VIASH_PAR_THREE_PRIME_CLIP_R1+x} ]; then echo "${VIASH_PAR_THREE_PRIME_CLIP_R1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_three_prime_clip_R1='&'#" ; else echo "# par_three_prime_clip_R1="; fi ) +$( if [ ! -z ${VIASH_PAR_THREE_PRIME_CLIP_R2+x} ]; then echo "${VIASH_PAR_THREE_PRIME_CLIP_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_three_prime_clip_R2='&'#" ; else echo "# par_three_prime_clip_R2="; fi ) +$( if [ ! -z ${VIASH_PAR_NEXTSEQ+x} ]; then echo "${VIASH_PAR_NEXTSEQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nextseq='&'#" ; else echo "# par_nextseq="; fi ) +$( if [ ! -z ${VIASH_PAR_BASENAME+x} ]; then echo "${VIASH_PAR_BASENAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_basename='&'#" ; else echo "# par_basename="; fi ) +$( if [ ! -z ${VIASH_PAR_HARDTRIM5+x} ]; then echo "${VIASH_PAR_HARDTRIM5}" | sed "s#'#'\\"'\\"'#g;s#.*#par_hardtrim5='&'#" ; else echo "# par_hardtrim5="; fi ) +$( if [ ! -z ${VIASH_PAR_HARDTRIM3+x} ]; then echo "${VIASH_PAR_HARDTRIM3}" | sed "s#'#'\\"'\\"'#g;s#.*#par_hardtrim3='&'#" ; else echo "# par_hardtrim3="; fi ) +$( if [ ! -z ${VIASH_PAR_CLOCK+x} ]; then echo "${VIASH_PAR_CLOCK}" | sed "s#'#'\\"'\\"'#g;s#.*#par_clock='&'#" ; else echo "# par_clock="; fi ) +$( if [ ! -z ${VIASH_PAR_POLYA+x} ]; then echo "${VIASH_PAR_POLYA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_polyA='&'#" ; else echo "# par_polyA="; fi ) +$( if [ ! -z ${VIASH_PAR_IMPLICON+x} ]; then echo "${VIASH_PAR_IMPLICON}" | sed "s#'#'\\"'\\"'#g;s#.*#par_implicon='&'#" ; else echo "# par_implicon="; fi ) +$( if [ ! -z ${VIASH_PAR_RRBS+x} ]; then echo "${VIASH_PAR_RRBS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_rrbs='&'#" ; else echo "# par_rrbs="; fi ) +$( if [ ! -z ${VIASH_PAR_NON_DIRECTIONAL+x} ]; then echo "${VIASH_PAR_NON_DIRECTIONAL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_non_directional='&'#" ; else echo "# par_non_directional="; fi ) +$( if [ ! -z ${VIASH_PAR_KEEP+x} ]; then echo "${VIASH_PAR_KEEP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_keep='&'#" ; else echo "# par_keep="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_RETAIN_UNPAIRED+x} ]; then echo "${VIASH_PAR_RETAIN_UNPAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_retain_unpaired='&'#" ; else echo "# par_retain_unpaired="; fi ) +$( if [ ! -z ${VIASH_PAR_LENGTH_1+x} ]; then echo "${VIASH_PAR_LENGTH_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_length_1='&'#" ; else echo "# par_length_1="; fi ) +$( if [ ! -z ${VIASH_PAR_LENGTH_2+x} ]; then echo "${VIASH_PAR_LENGTH_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_length_2='&'#" ; else echo "# par_length_2="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DIR+x} ]; then echo "${VIASH_PAR_OUTPUT_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_dir='&'#" ; else echo "# par_output_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIMMED_R1+x} ]; then echo "${VIASH_PAR_TRIMMED_R1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trimmed_r1='&'#" ; else echo "# par_trimmed_r1="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIMMED_R2+x} ]; then echo "${VIASH_PAR_TRIMMED_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trimmed_r2='&'#" ; else echo "# par_trimmed_r2="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIMMING_REPORT_R1+x} ]; then echo "${VIASH_PAR_TRIMMING_REPORT_R1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trimming_report_r1='&'#" ; else echo "# par_trimming_report_r1="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIMMING_REPORT_R2+x} ]; then echo "${VIASH_PAR_TRIMMING_REPORT_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trimming_report_r2='&'#" ; else echo "# par_trimming_report_r2="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIMMED_FASTQC_HTML_1+x} ]; then echo "${VIASH_PAR_TRIMMED_FASTQC_HTML_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trimmed_fastqc_html_1='&'#" ; else echo "# par_trimmed_fastqc_html_1="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIMMED_FASTQC_HTML_2+x} ]; then echo "${VIASH_PAR_TRIMMED_FASTQC_HTML_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trimmed_fastqc_html_2='&'#" ; else echo "# par_trimmed_fastqc_html_2="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIMMED_FASTQC_ZIP_1+x} ]; then echo "${VIASH_PAR_TRIMMED_FASTQC_ZIP_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trimmed_fastqc_zip_1='&'#" ; else echo "# par_trimmed_fastqc_zip_1="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIMMED_FASTQC_ZIP_2+x} ]; then echo "${VIASH_PAR_TRIMMED_FASTQC_ZIP_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trimmed_fastqc_zip_2='&'#" ; else echo "# par_trimmed_fastqc_zip_2="; fi ) +$( if [ ! -z ${VIASH_PAR_UNPAIRED_R1+x} ]; then echo "${VIASH_PAR_UNPAIRED_R1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_unpaired_r1='&'#" ; else echo "# par_unpaired_r1="; fi ) +$( if [ ! -z ${VIASH_PAR_UNPAIRED_R2+x} ]; then echo "${VIASH_PAR_UNPAIRED_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_unpaired_r2='&'#" ; else echo "# par_unpaired_r2="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +[[ ! -d \\$output_dir ]] && mkdir -p \\$par_output_dir + +IFS=";" read -ra input <<< \\$par_input + +unset_if_false=( + par_phred33 + par_phred64 + par_fastqc + par_illumina + par_stranded_illumina + par_nextera + par_small_rna + par_gzip + par_dont_gzip + par_trim_n + par_no_report_file + par_suppress_warn + par_clock + par_polyA + par_implicon + par_rrbs + par_non_directional + par_keep + par_paired + par_retain_unpaired +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +# Add FastQC file arguments to fastqc_args +fastqc_args="\\${par_fastqc_args}" +if [ -f "\\$par_fastqc_contaminants" ]; then + fastqc_args+=" --contaminants \\$par_fastqc_contaminants" +fi +if [ -f "\\$par_fastqc_adapters" ]; then + fastqc_args+=" --adapters \\$par_fastqc_adapters" +fi +if [ -f "\\$par_fastqc_limits" ]; then + fastqc_args+=" --limits \\$par_fastqc_limits" +fi + +trim_galore \\\\ + \\${par_quality:+-q "\\${par_quality}"} \\\\ + \\${par_phred33:+--phred33} \\\\ + \\${par_phred64:+--phred64 } \\\\ + \\${par_fastqc:+--fastqc } \\\\ + \\${fastqc_args:+--fastqc_args "\\${fastqc_args}"} \\\\ + \\${par_adapter:+-a "\\${par_adapter}"} \\\\ + \\${par_adapter2:+-a2 "\\${par_adapter2}"} \\\\ + \\${par_illumina:+--illumina} \\\\ + \\${par_stranded_illumina:+--stranded_illumina} \\\\ + \\${par_nextera:+--nextera} \\\\ + \\${par_small_rna:+--small_rna} \\\\ + \\${par_consider_already_trimmed:+--consider_already_trimmed "\\${par_consider_already_trimmed}"} \\\\ + \\${par_max_length:+--max_length "\\${par_max_length}"} \\\\ + \\${par_stringency:+--stringency "\\${par_stringency}"} \\\\ + \\${par_error_rate:+-e "\\${par_error_rate}"} \\\\ + \\${par_gzip:+--gzip} \\\\ + \\${par_dont_gzip:+--dont_gzip} \\\\ + \\${par_length:+--length "\\${par_length}"} \\\\ + \\${par_max_n:+--max_n "\\${par_max_n}"} \\\\ + \\${par_trim_n:+--trim-n "\\${par_trim_n}"} \\\\ + \\${par_no_report_file:+--no_report_file} \\\\ + \\${par_suppress_warn:+--suppress_warn} \\\\ + \\${par_clip_R1:+--clip_R1 "\\${par_clip_R1}"} \\\\ + \\${par_clip_R2:+--clip_R2 "\\${par_clip_R2}"} \\\\ + \\${par_three_prime_clip_R1:+--three_prime_clip_R1 "\\${par_three_prime_clip_R1}"} \\\\ + \\${par_three_prime_clip_R2:+--three_prime_clip_R2 "\\${par_three_prime_clip_R2}"} \\\\ + \\${par_nextseq:+--nextseq "\\${par_nextseq}"} \\\\ + \\${par_basename:+-basename "\\${par_basename}"} \\\\ + \\${par_hardtrim5:+--hardtrim5 "\\${par_hardtrim5}"} \\\\ + \\${par_hardtrim3:+--hardtrim3 "\\${par_hardtrim3}"} \\\\ + \\${par_clock:+--clock} \\\\ + \\${par_polyA:+--polyA} \\\\ + \\${par_implicon:+--implicon "\\${par_implicon}"} \\\\ + \\${par_rrbs:+--rrbs} \\\\ + \\${par_non_directional:+--non_directional} \\\\ + \\${par_keep:+--keep} \\\\ + \\${par_paired:+--paired} \\\\ + \\${par_retain_unpaired:+--retain_unpaired} \\\\ + \\${par_length_1:+-r1 "\\${par_length_1}"} \\\\ + \\${par_length_2:+-r2 "\\${par_length_2}"} \\\\ + \\${meta_cpus:+-j "\\${meta_cpus}"} \\\\ + -o \\$par_output_dir \\\\ + \\${input[*]} + +if [ \\$par_paired == "true" ]; then + + input_r1=\\$(basename -- "\\${input[0]}") + input_r2=\\$(basename -- "\\${input[1]}") + [[ ! -z "\\$par_trimmed_r1" ]] && mv \\$par_output_dir/*val_1.f*q* \\$par_trimmed_r1 + [[ ! -z "\\$par_trimmed_r2" ]] && mv \\$par_output_dir/*val_2.f*q* \\$par_trimmed_r2 + [[ ! -z "\\$par_trimming_report_r1" ]] && mv \\$par_output_dir/\\${input_r1}_trimming_report.txt \\$par_trimming_report_r1 + [[ ! -z "\\$par_trimming_report_r2" ]] && mv \\$par_output_dir/\\${input_r2}_trimming_report.txt \\$par_trimming_report_r2 + + if [ "\\$par_fastqc" == "true" ]; then + [[ ! -z "\\$par_trimmed_fastqc_html_1" ]] && mv \\$par_output_dir/*val_1_fastqc.html \\$par_trimmed_fastqc_html_1 + [[ ! -z "\\$par_trimmed_fastqc_html_2" ]] && mv \\$par_output_dir/*val_2_fastqc.html \\$par_trimmed_fastqc_html_2 + [[ ! -z "\\$par_trimmed_fastqc_zip_1" ]] && mv \\$par_output_dir/*val_1_fastqc.zip \\$par_trimmed_fastqc_zip_1 + [[ ! -z "\\$par_trimmed_fastqc_zip_2" ]] && mv \\$par_output_dir/*val_2_fastqc.zip \\$par_trimmed_fastqc_zip_2 + fi + + if [ "\\$par_retain_unpaired" == "true" ]; then + [[ ! -z "\\$par_unpaired_r1" ]] && mv \\$par_output_dir/*.unpaired_1.f*q* \\$par_unpaired_r1 + [[ ! -z "\\$par_unpaired_r2" ]] && mv \\$par_output_dir/*.unpaired_2.f*q* \\$par_unpaired_r2 + fi + +else + + input_r1=\\$(basename -- "\\${input[0]}") + [[ ! -z "\\$par_trimmed_r1" ]] && mv \\$par_output_dir/*_trimmed.fq* \\$par_trimmed_r1 + [[ ! -z "\\$par_trimming_report_r1" ]] && mv \\$par_output_dir/\\${input_r1}_trimming_report.txt \\$par_trimming_report_r1 + + if [ "\\$par_fastqc" == "true" ]; then + [[ ! -z "\\$par_trimmed_fastqc_html_1" ]] && mv \\$par_output_dir/*_trimmed_fastqc.html \\$par_trimmed_fastqc_html_1 + [[ ! -z "\\$par_trimmed_fastqc_zip_1" ]] && mv \\$par_output_dir/*_trimmed_fastqc.zip \\$par_trimmed_fastqc_zip_1 + fi + +fi +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/trimgalore", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/nextflow.config new file mode 100644 index 0000000..32eac02 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'trimgalore' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'A wrapper tool around Cutadapt and FastQC to consistently apply quality and adapter trimming to FastQ files. \n' + author = 'Sai Nirmayi Yasa' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/nextflow_schema.json new file mode 100644 index 0000000..b022cc4 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/nextflow_schema.json @@ -0,0 +1,711 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "trimgalore", +"description": "A wrapper tool around Cutadapt and FastQC to consistently apply quality and adapter trimming to FastQ files. \n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq`, multiple_sep: `\";\"`. Input files", + "help_text": "Type: List of `file`, required, example: `sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq`, multiple_sep: `\";\"`. Input files. Note that paired-end files need to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz" + + } + + +} +}, + + + "trimming options" : { + "title": "Trimming options", + "type": "object", + "description": "No description", + "properties": { + + + "quality": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. Trim low-quality ends (below the specified Phred score) from reads in addition to adapter removal", + "help_text": "Type: `integer`, example: `20`. Trim low-quality ends (below the specified Phred score) from reads in addition to adapter removal. For RRBS samples, quality trimming will be performed first, and adapter trimming is carried in a second round. Other files are quality and adapter trimmed in a single pass. The algorithm is the same as the one used by BWA (Subtract INT from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal)." + + } + + + , + "phred33": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1", + "help_text": "Type: `boolean_true`, default: `false`. Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1.9+ encoding) for quality trimming." + , + "default":false + } + + + , + "phred64": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1", + "help_text": "Type: `boolean_true`, default: `false`. Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1.5 encoding) for quality trimming." + , + "default":false + } + + + , + "fastqc": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Run FastQC in the default mode on the FastQ file once trimming is complete", + "help_text": "Type: `boolean_true`, default: `false`. Run FastQC in the default mode on the FastQ file once trimming is complete." + , + "default":false + } + + + , + "fastqc_args": { + "type": + "string", + "description": "Type: `string`, example: `--nogroup --noextract`. Passes extra arguments (excluding files) to FastQC", + "help_text": "Type: `string`, example: `--nogroup --noextract`. Passes extra arguments (excluding files) to FastQC. If more than one argument is to be passed to FastQC they must be in the form \"arg1 arg2 ...\". Passing extra arguments will automatically invoke FastQC, so --fastqc does not have to be specified separately." + + } + + + , + "fastqc_contaminants": { + "type": + "string", + "description": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list of contaminants for FastQC to screen overrepresented sequences against", + "help_text": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list of contaminants for FastQC to screen overrepresented sequences against. The file must contain sets of named contaminants in the form name[tab]sequence. Lines prefixed with a hash will be ignored." + + } + + + , + "fastqc_adapters": { + "type": + "string", + "description": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of adapter sequences which which FasstQC will explicity search against the library", + "help_text": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of adapter sequences which which FasstQC will explicity search against the library. The file must contain sets of named adapters in the form name[tab]sequence. Lines prefixed with a hash will be ignored." + + } + + + , + "fastqc_limits": { + "type": + "string", + "description": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains a set of criteria which FastQC will use to determine the warn/error limits for the various modules", + "help_text": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains a set of criteria which FastQC will use to determine the warn/error limits for the various modules. This file can also be used to selectively remove some modules from the output all together. The format needs to mirror the default limits.txt file found in the Configuration folder." + + } + + + , + "adapter": { + "type": + "string", + "description": "Type: `string`, example: `AGCTCCCG`. Adapter sequence to be trimmed", + "help_text": "Type: `string`, example: `AGCTCCCG`. Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will try to auto-detect whether the Illumina universal, Nextera transposase or Illumina small RNA adapter sequence was used. A single base may also be given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA. \nAt a special request, multiple adapters can also be specified like so: \n -a \" AGCTCCCG -a TTTCATTATAT -a TTTATTCGGATTTAT\" -a2 \" AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT\", \nor so:\n -a \"file:../multiple_adapters.fa\" -a2 \"file:../different_adapters.fa\"\nPotentially in conjucntion with the parameter \"-n 3\" to trim all adapters. \n" + + } + + + , + "adapter2": { + "type": + "string", + "description": "Type: `string`, example: `AGCTCCCG`. Optional adapter sequence to be trimmed off read 2 of paired-end files", + "help_text": "Type: `string`, example: `AGCTCCCG`. Optional adapter sequence to be trimmed off read 2 of paired-end files. This option requires \u0027--paired\u0027 to be specified as well. If the libraries to be trimmed are smallRNA then a2 will be set to the Illumina small RNA 5\u0027 adapter automatically (GATCGTCGGACT). A single base may also be given as e.g. -a2 A{10}, to be expanded to -a2 AAAAAAAAAA." + + } + + + , + "illumina": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter \u0027AGATCGGAAGAGC\u0027 instead of the default auto-detection of adapter sequence", + "help_text": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter \u0027AGATCGGAAGAGC\u0027 instead of the default auto-detection of adapter sequence." + , + "default":false + } + + + , + "stranded_illumina": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 13bp of the Illumina stranded mRNA or Total RNA adapter \u0027ACTGTCTCTTATA\u0027 instead of the default auto-detection of adapter sequence", + "help_text": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 13bp of the Illumina stranded mRNA or Total RNA adapter \u0027ACTGTCTCTTATA\u0027 instead of the default auto-detection of adapter sequence." + , + "default":false + } + + + , + "nextera": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 12bp of the Nextera adapter \u0027CTGTCTCTTATA\u0027 instead of the default auto-detection of adapter sequence", + "help_text": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 12bp of the Nextera adapter \u0027CTGTCTCTTATA\u0027 instead of the default auto-detection of adapter sequence." + , + "default":false + } + + + , + "small_rna": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA 3\u0027 Adapter \u0027TGGAATTCTCGG\u0027 instead of the default auto-detection of adapter sequence", + "help_text": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA 3\u0027 Adapter \u0027TGGAATTCTCGG\u0027 instead of the default auto-detection of adapter sequence. Selecting to trim smallRNA adapters will also lower the --length value to 18bp. If the smallRNA libraries are paired-end then a automatically (GATCGTCGGACT) unless -a 2 had been defined explicitly." + , + "default":false + } + + + , + "consider_already_trimmed": { + "type": + "integer", + "description": "Type: `integer`. During adapter auto-detection, the limit set by this argument allows the user to set a threshold up to which the file is considered already adapter-trimmed", + "help_text": "Type: `integer`. During adapter auto-detection, the limit set by this argument allows the user to set a threshold up to which the file is considered already adapter-trimmed. If no adapter sequence exceeds this threshold, no additional adapter trimming will be performed (technically, the adapter is set to \u0027-a X\u0027). Quality trimming is still performed as usual." + + } + + + , + "max_length": { + "type": + "integer", + "description": "Type: `integer`. Discard reads that are longer than the specified value after trimming", + "help_text": "Type: `integer`. Discard reads that are longer than the specified value after trimming. This is only advised for smallRNA sequencing to remove non-small RNA sequences." + + } + + + , + "stringency": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Overlap with adapter sequence required to trim a sequence", + "help_text": "Type: `integer`, example: `1`. Overlap with adapter sequence required to trim a sequence. Defaults to a very stringent setting of 1, i.e. even a single bp of overlapping sequence will be trimmed off from the 3\u0027 end of any read." + + } + + + , + "error_rate": { + "type": + "number", + "description": "Type: `double`, example: `0.1`. Maximum allowed error rate (no", + "help_text": "Type: `double`, example: `0.1`. Maximum allowed error rate (no. of errors divided by the length of the matching region)" + + } + + + , + "gzip": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Compress the output file with GZIP", + "help_text": "Type: `boolean_true`, default: `false`. Compress the output file with GZIP. If the input files are GZIP-compressed the output files will automatically be GZIP compressed as well. As of v0.2.8 the compression will take place on the fly." + , + "default":false + } + + + , + "dont_gzip": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output files won\u0027t be compressed with GZIP", + "help_text": "Type: `boolean_true`, default: `false`. Output files won\u0027t be compressed with GZIP. This option overrides --gzip." + , + "default":false + } + + + , + "length": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. Discard reads that became shorter than the specified length because of either quality or adapter trimming", + "help_text": "Type: `integer`, example: `20`. Discard reads that became shorter than the specified length because of either quality or adapter trimming. A value of \u00270\u0027 effectively disables this behaviour. For paired-end files, both reads of a read-pair need to be longer than the specified length to be printed out to validated paired-end files. If only one read became too short there is the possibility of keeping such unpaired single-end reads using the --retain_unpaired option." + + } + + + , + "max_n": { + "type": + "integer", + "description": "Type: `integer`. The total number of Ns a read may contain before it will be removed altogether", + "help_text": "Type: `integer`. The total number of Ns a read may contain before it will be removed altogether.In a paired-end setting, either read exceeding this limit will result in the entire pair being removed from the trimmed output files. If COUNT is a number between 0 and 1, it is interpreted as a fraction of the read length." + + } + + + , + "trim_n": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Removes Ns from either side of the read", + "help_text": "Type: `boolean_true`, default: `false`. Removes Ns from either side of the read. This option does currently not work in RRBS mode." + , + "default":false + } + + + , + "no_report_file": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If specified no report file will be generated", + "help_text": "Type: `boolean_true`, default: `false`. If specified no report file will be generated." + , + "default":false + } + + + , + "suppress_warn": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If specified any output to STDOUT or STDERR will be suppressed", + "help_text": "Type: `boolean_true`, default: `false`. If specified any output to STDOUT or STDERR will be suppressed." + , + "default":false + } + + + , + "clip_R1": { + "type": + "integer", + "description": "Type: `integer`. Instructs TrimGalore to remove given number of bp from the 5\u0027 end of read 1 (or single-end reads)", + "help_text": "Type: `integer`. Instructs TrimGalore to remove given number of bp from the 5\u0027 end of read 1 (or single-end reads). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5\u0027 end." + + } + + + , + "clip_R2": { + "type": + "integer", + "description": "Type: `integer`. Instructs TrimGalore to remove given number bp from the 5\u0027 end of read 2 (paired-end reads only)", + "help_text": "Type: `integer`. Instructs TrimGalore to remove given number bp from the 5\u0027 end of read 2 (paired-end reads only). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5\u0027 end. For paired-end BS-Seq, it is recommended to remove the first few bp because the end-repair reaction may introduce a bias towards low methylation." + + } + + + , + "three_prime_clip_R1": { + "type": + "integer", + "description": "Type: `integer`. Instructs Trim Galore to remove spacified number of bp from the 3\u0027 end of read 1 (or single-end reads) AFTER adapter/quality trimming has been performed", + "help_text": "Type: `integer`. Instructs Trim Galore to remove spacified number of bp from the 3\u0027 end of read 1 (or single-end reads) AFTER adapter/quality trimming has been performed. This may remove some bias from the 3\u0027 end that is not directly related to adapter sequence or basecall quality." + + } + + + , + "three_prime_clip_R2": { + "type": + "integer", + "description": "Type: `integer`. Instructs Trim Galore to remove \u003cint\u003e bp from the 3\u0027 end of read 2 AFTER adapter/quality trimming has been performed", + "help_text": "Type: `integer`. Instructs Trim Galore to remove \u003cint\u003e bp from the 3\u0027 end of read 2 AFTER adapter/quality trimming has been performed. This may remove some unwanted bias from the 3\u0027 end that is not directly related to adapter sequence or basecall quality." + + } + + + , + "nextseq": { + "type": + "integer", + "description": "Type: `integer`. This enables the option \u0027--nextseq-trim=3\u0027CUTOFF\u0027 within Cutadapt, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored", + "help_text": "Type: `integer`. This enables the option \u0027--nextseq-trim=3\u0027CUTOFF\u0027 within Cutadapt, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases. This is mutually exlusive with \u0027-q INT\u0027." + + } + + + , + "basename": { + "type": + "string", + "description": "Type: `string`. Use specified name (PREFERRED_NAME) as the basename for output files, instead of deriving the filenames from the input files", + "help_text": "Type: `string`. Use specified name (PREFERRED_NAME) as the basename for output files, instead of deriving the filenames from the input files. Single-end data would be called PREFERRED_NAME_trimmed.fq(.gz), or PREFERRED_NAME_val_1.fq(.gz) and PREFERRED_NAME_val_2.fq(.gz) for paired-end data. --basename only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." + + } + + +} +}, + + + "specific trimming options without adapter/quality trimming" : { + "title": "Specific trimming options without adapter/quality trimming", + "type": "object", + "description": "No description", + "properties": { + + + "hardtrim5": { + "type": + "integer", + "description": "Type: `integer`. Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to \u003cint\u003e bp at the 5\u0027-end", + "help_text": "Type: `integer`. Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to \u003cint\u003e bp at the 5\u0027-end. Once hard-trimming of files is complete, Trim Galore will exit. Hard-trimmed output files will end in .\u003cint\u003e_5prime.fq(.gz)." + + } + + + , + "hardtrim3": { + "type": + "integer", + "description": "Type: `integer`. Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to \u003cint\u003e bp at the 3\u0027-end", + "help_text": "Type: `integer`. Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to \u003cint\u003e bp at the 3\u0027-end. Once hard-trimming of files is complete, Trim Galore will exit. Hard-trimmed output files will end in .\u003cint\u003e_3prime.fq(.gz)." + + } + + + , + "clock": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. In this mode, reads are trimmed in a specific way that is currently used for the Mouse Epigenetic Clock", + "help_text": "Type: `boolean_true`, default: `false`. In this mode, reads are trimmed in a specific way that is currently used for the Mouse Epigenetic Clock." + , + "default":false + } + + + , + "polyA": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. This is a new, still experimental, trimming mode to identify and remove poly-A tails from sequences", + "help_text": "Type: `boolean_true`, default: `false`. This is a new, still experimental, trimming mode to identify and remove poly-A tails from sequences. When --polyA is selected, Trim Galore attempts to identify from the first supplied sample whether sequences contain more often a stretch of either \u0027AAAAAAAAAA\u0027 or \u0027TTTTTTTTTT\u0027. This determines if Read 1 of a paired-end end file, or single-end files, are trimmed for PolyA or PolyT. In case of paired-end sequencing, Read2 is trimmed for the complementary base from the start of the reads. The auto-detection uses a default of A{20} for Read1 (3\u0027-end trimming) and T{150} for Read2 (5\u0027-end trimming). These values may be changed manually using the options -a and -a2. In addition to trimming the sequences, white spaces are replaced with _ and it records in the read ID how many bases were trimmed so it can later be used to identify PolyA trimmed sequences. This is currently done by writing tags to both the start (\"32:A:\") and end (\"_PolyA:32\") of the reads. The poly-A trimming mode expects that sequences were both adapter and quality before looking for Poly-A tails, and it is the user\u0027s responsibility to carry out an initial round of trimming." + , + "default":false + } + + + , + "implicon": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. This is a special mode of operation for paired-end data, such as required for the IMPLICON method, where a UMI sequence is getting transferred from the start of Read 2 to the readID of both reads", + "help_text": "Type: `boolean_true`, default: `false`. This is a special mode of operation for paired-end data, such as required for the IMPLICON method, where a UMI sequence is getting transferred from the start of Read 2 to the readID of both reads. Following this, Trim Galore will exit. In it\u0027s current implementation, the UMI carrying reads come in the following format\n Read 1 5\u0027 FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 3\u0027\n Read 2 3\u0027 UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5\u0027\nWhere UUUUUUUU is a random 8-mer unique molecular identifier (UMI) and FFFFFFF... is the actual fragment to be sequenced. The UMI of Read 2 (R2) is written into the read ID of both reads and removed from the actual sequence.\n" + , + "default":false + } + + +} +}, + + + "rrbs-specific options" : { + "title": "RRBS-specific options", + "type": "object", + "description": "No description", + "properties": { + + + "rrbs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Specifies that the input file was an MspI digested RRBS sample (recognition site is CCGG)", + "help_text": "Type: `boolean_true`, default: `false`. Specifies that the input file was an MspI digested RRBS sample (recognition site is CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed will have a further 2 bp removed from their 3\u0027 end. Sequences which were merely trimmed because of poor quality will not be shortened further. Read 2 of paired-end libraries will in addition have the first 2 bp removed from the 5\u0027 end (by setting \u0027--clip_r2 2\u0027). This is to avoid using artificial methylation calls from the filled-in cytosine positions close to the 3\u0027 MspI site in sequenced fragments. This option is not recommended for users of the Tecan Ovation RRBS Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below)." + , + "default":false + } + + + , + "non_directional": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Selecting this option for non-directional RRBS libraries will screen quality-trimmed sequences for \u0027CAA\u0027 or \u0027CGA\u0027 at the start of the read and, if found, removes the first two basepairs", + "help_text": "Type: `boolean_true`, default: `false`. Selecting this option for non-directional RRBS libraries will screen quality-trimmed sequences for \u0027CAA\u0027 or \u0027CGA\u0027 at the start of the read and, if found, removes the first two basepairs. Like with the option \u0027--rrbs\u0027 this avoids using cytosine positions that were filled-in during the end-repair step. \u0027--non_directional\u0027 requires \u0027--rrbs\u0027 to be specified as well. Note that this option does not set \u0027--clip_r2 2\u0027 in paired-end mode." + , + "default":false + } + + + , + "keep": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Keep the quality trimmed intermediate file", + "help_text": "Type: `boolean_true`, default: `false`. Keep the quality trimmed intermediate file." + , + "default":false + } + + +} +}, + + + "paired-end specific options" : { + "title": "Paired-end specific options", + "type": "object", + "description": "No description", + "properties": { + + + "paired": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. This option performs length trimming of quality/adapter/RRBS trimmed reads for paired-end files", + "help_text": "Type: `boolean_true`, default: `false`. This option performs length trimming of quality/adapter/RRBS trimmed reads for paired-end files. To pass the validation test, both sequences of a sequence pair are required to have a certain minimum length which is governed by the option --length (see above). If only one read passes this length threshold the other read can be rescued (see option --retain_unpaired). Using this option lets you discard too short read pairs without disturbing the sequence-by-sequence order of FastQ files which is required by many aligners. Trim Galore expects paired-end files to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... ." + , + "default":false + } + + + , + "retain_unpaired": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If only one of the two paired-end reads became too short, the longer read will be written to either \u0027", + "help_text": "Type: `boolean_true`, default: `false`. If only one of the two paired-end reads became too short, the longer read will be written to either \u0027.unpaired_1.fq\u0027 or \u0027.unpaired_2.fq\u0027 output files. The length cutoff for unpaired single-end reads is governed by the parameters -r1/--length_1 and -r2/--length_2." + , + "default":false + } + + + , + "length_1": { + "type": + "integer", + "description": "Type: `integer`, example: `35`. Unpaired single-end read length cutoff needed for read 1 to be written to \u0027", + "help_text": "Type: `integer`, example: `35`. Unpaired single-end read length cutoff needed for read 1 to be written to \u0027.unpaired_1.fq\u0027 output file. These reads may be mapped in single-end mode." + + } + + + , + "length_2": { + "type": + "integer", + "description": "Type: `integer`, example: `35`. Unpaired single-end read length cutoff needed for read 2 to be written to \u0027", + "help_text": "Type: `integer`, example: `35`. Unpaired single-end read length cutoff needed for read 2 to be written to \u0027.unpaired_2.fq\u0027 output file. These reads may be mapped in single-end mode." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_dir": { + "type": + "string", + "description": "Type: `file`, required, default: `trimmed_output`. If specified all output will be written to this directory instead of the current directory", + "help_text": "Type: `file`, required, default: `trimmed_output`. If specified all output will be written to this directory instead of the current directory." + , + "default":"trimmed_output" + } + + + , + "trimmed_r1": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.trimmed_r1.fastq`, example: `read_1.fastq`. Output file for read 1", + "help_text": "Type: `file`, default: `$id.$key.trimmed_r1.fastq`, example: `read_1.fastq`. Output file for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." + , + "default":"$id.$key.trimmed_r1.fastq" + } + + + , + "trimmed_r2": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.trimmed_r2.fastq`, example: `read_2.fastq`. Output file for read 2", + "help_text": "Type: `file`, default: `$id.$key.trimmed_r2.fastq`, example: `read_2.fastq`. Output file for read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." + , + "default":"$id.$key.trimmed_r2.fastq" + } + + + , + "trimming_report_r1": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.trimming_report_r1.txt`, example: `read_1.trimming_report.txt`. Trimming report for read 1", + "help_text": "Type: `file`, default: `$id.$key.trimming_report_r1.txt`, example: `read_1.trimming_report.txt`. Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." + , + "default":"$id.$key.trimming_report_r1.txt" + } + + + , + "trimming_report_r2": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.trimming_report_r2.txt`, example: `read_2.trimming_report.txt`. Trimming report for read 1", + "help_text": "Type: `file`, default: `$id.$key.trimming_report_r2.txt`, example: `read_2.trimming_report.txt`. Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." + , + "default":"$id.$key.trimming_report_r2.txt" + } + + + , + "trimmed_fastqc_html_1": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.trimmed_fastqc_html_1.html`, example: `read_1.fastqc.html`. FastQC report for trimmed (single-end) reads (or read 1 for paired-end)", + "help_text": "Type: `file`, default: `$id.$key.trimmed_fastqc_html_1.html`, example: `read_1.fastqc.html`. FastQC report for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." + , + "default":"$id.$key.trimmed_fastqc_html_1.html" + } + + + , + "trimmed_fastqc_html_2": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.trimmed_fastqc_html_2.html`, example: `read_2.fastqc.html`. FastQC report for trimmed reads (read2 for paired-end)", + "help_text": "Type: `file`, default: `$id.$key.trimmed_fastqc_html_2.html`, example: `read_2.fastqc.html`. FastQC report for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." + , + "default":"$id.$key.trimmed_fastqc_html_2.html" + } + + + , + "trimmed_fastqc_zip_1": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.trimmed_fastqc_zip_1.zip`, example: `read_1.fastqc.zip`. FastQC results for trimmed (single-end) reads (or read 1 for paired-end)", + "help_text": "Type: `file`, default: `$id.$key.trimmed_fastqc_zip_1.zip`, example: `read_1.fastqc.zip`. FastQC results for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." + , + "default":"$id.$key.trimmed_fastqc_zip_1.zip" + } + + + , + "trimmed_fastqc_zip_2": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.trimmed_fastqc_zip_2.zip`, example: `read_2.fastqc.zip`. FastQC results for trimmed reads (read2 for paired-end)", + "help_text": "Type: `file`, default: `$id.$key.trimmed_fastqc_zip_2.zip`, example: `read_2.fastqc.zip`. FastQC results for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." + , + "default":"$id.$key.trimmed_fastqc_zip_2.zip" + } + + + , + "unpaired_r1": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.unpaired_r1.fastq`, example: `unpaired_read_1.fastq`. Output file for unpired read 1", + "help_text": "Type: `file`, default: `$id.$key.unpaired_r1.fastq`, example: `unpaired_read_1.fastq`. Output file for unpired read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." + , + "default":"$id.$key.unpaired_r1.fastq" + } + + + , + "unpaired_r2": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.unpaired_r2.fastq`, example: `unpaired_read_2.fastq`. Output file for unpaired read 2", + "help_text": "Type: `file`, default: `$id.$key.unpaired_r2.fastq`, example: `unpaired_read_2.fastq`. Output file for unpaired read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." + , + "default":"$id.$key.unpaired_r2.fastq" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/trimming options" + }, + + { + "$ref": "#/definitions/specific trimming options without adapter/quality trimming" + }, + + { + "$ref": "#/definitions/rrbs-specific options" + }, + + { + "$ref": "#/definitions/paired-end specific options" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml new file mode 100644 index 0000000..af03844 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml @@ -0,0 +1,652 @@ +name: "umi_tools_dedup" +namespace: "umi_tools" +version: "v0.3.1" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "--stdin" + description: "Input BAM or SAM file. Use --in_sam to specify SAM format." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--in_sam" + description: "By default, inputs are assumed to be in BAM format. Use this options\ + \ to specify the use of SAM\nformat for input.\n" + info: null + direction: "input" + - type: "file" + name: "--bai" + description: "BAM index" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--random_seed" + description: "Random seed to initialize number generator with." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "--stdout" + description: "Deduplicated BAM file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--out_sam" + description: "By default, outputa are written in BAM format. Use this options\ + \ to specify the use of SAM format\nfor output.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--paired" + description: "BAM is paired end - output both read pairs. This will also force\ + \ the use of the template length\nto determine reads with the same mapping coordinates.\n" + info: null + direction: "input" + - type: "string" + name: "--output_stats" + description: "Generate files containing UMI based deduplication statistics files\ + \ with this prefix in the file names.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extract_umi_method" + description: "Specify the method by which the barcodes were encoded in the read.\n\ + The options are:\n * read_id (default) \n * tag\n * umis\n" + info: null + example: + - "read_id" + required: false + choices: + - "read_id" + - "tag" + - "umis" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umi_tag" + description: "The tag containing the UMI sequence. This is only required if the\ + \ extract_umi_method is set to tag.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umi_separator" + description: "The separator used to separate the UMI from the read sequence. This\ + \ is only required if the\nextract_umi_method is set to id_read. Default: `_`.\n" + info: null + example: + - "_" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umi_tag_split" + description: "Separate the UMI in tag by and take the first element." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umi_tag_delimiter" + description: "Separate the UMI in by and concatenate the elements." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--cell_tag" + description: "The tag containing the cell barcode sequence. This is only required\ + \ if the extract_umi_method\nis set to tag.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--cell_tag_split" + description: "Separate the cell barcode in tag by and take the first element." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--cell_tag_delimiter" + description: "Separate the cell barcode in by and concatenate the\ + \ elements." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Grouping Options" + arguments: + - type: "string" + name: "--method" + description: "The method to use for grouping reads. \nThe options are: \n * unique\n\ + \ * percentile\n * cluster\n * adjacency\n * directional (default)\n" + info: null + example: + - "directional" + required: false + choices: + - "unique" + - "percentile" + - "cluster" + - "adjacency" + - "directional" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--edit_distance_threshold" + description: "For the adjacency and cluster methods the threshold for the edit\ + \ distance to connect two\nUMIs in the network can be increased. The default\ + \ value of 1 works best unless the UMI is\nvery long (>14bp). Default: `1`.\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--spliced_is_unique" + description: "Causes two reads that start in the same position on the same strand\ + \ and having the same UMI\nto be considered unique if one is spliced and the\ + \ other is not. (Uses the 'N' cigar operation\nto test for splicing).\n" + info: null + direction: "input" + - type: "integer" + name: "--soft_clip_threshold" + description: "Mappers that soft clip will sometimes do so rather than mapping\ + \ a spliced read if there is only\na small overhang over the exon junction.\ + \ By setting this option, you can treat reads with at\nleast this many bases\ + \ soft-clipped at the 3' end as spliced. Default: `4`.\n" + info: null + example: + - 4 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--multimapping_detection_method" + description: "If the sam/bam contains tags to identify multimapping reads, you\ + \ can specify for use when selecting\nthe best read at a given loci. Supported\ + \ tags are `NH`, `X0` and `XT`. If not specified, the read\nwith the highest\ + \ mapping quality will be selected.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--read_length" + description: "Use the read length as a criteria when deduping, for e.g. sRNA-Seq." + info: null + direction: "input" +- name: "Single-cell RNA-Seq Options" + arguments: + - type: "boolean_true" + name: "--per_gene" + description: "Reads will be grouped together if they have the same gene. This\ + \ is useful if your library prep\ngenerates PCR duplicates with non identical\ + \ alignment positions such as CEL-Seq. Note this option\nis hardcoded to be\ + \ on with the count command. I.e. counting is always performed per-gene. Must\ + \ be\ncombined with either --gene_tag or --per_contig option.\n" + info: null + direction: "input" + - type: "string" + name: "--gene_tag" + description: "Deduplicate per gene. The gene information is encoded in the bam\ + \ read tag specified.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--assigned_status_tag" + description: "BAM tag which describes whether a read is assigned to a gene. Defaults\ + \ to the same value as given\nfor --gene_tag.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--skip_tags_regex" + description: "Use in conjunction with the --assigned_status_tag option to skip\ + \ any reads where the tag matches\nthis regex. Default (\"^[__|Unassigned]\"\ + ) matches anything which starts with \"__\" or \"Unassigned\".\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--per_contig" + description: "Deduplicate per contig (field 3 in BAM; RNAME). All reads with the\ + \ sam contig will be considered to\nhave the same alignment position. This is\ + \ useful if you have aligned to a reference transcriptome\nwith one transcript\ + \ per gene. If you have aligned to a transcriptome with more than one transcript\n\ + per gene, you can supply a map between transcripts and gene using the --gene_transcript_map\ + \ option.\n" + info: null + direction: "input" + - type: "file" + name: "--gene_transcript_map" + description: "A file containing a mapping between gene names and transcript names.\ + \ The file should be tab\nseparated with the gene name in the first column and\ + \ the transcript name in the second column.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--per_cell" + description: "Reads will only be grouped together if they have the same cell barcode.\ + \ Can be combined with\n--per_gene.\n" + info: null + direction: "input" +- name: "SAM/BAM Options" + arguments: + - type: "integer" + name: "--mapping_quality" + description: "Minimium mapping quality (MAPQ) for a read to be retained. Default:\ + \ `0`.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--unmapped_reads" + description: "How unmapped reads should be handled. \nThe options are:\n * \"\ + discard\": Discard all unmapped reads. (default)\n * \"use\": If read2\ + \ is unmapped, deduplicate using read1 only. Requires --paired.\n * \"output\"\ + : Output unmapped reads/read pairs without UMI grouping/deduplication. Only\ + \ available in umi_tools group.\n" + info: null + example: + - "discard" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--chimeric_pairs" + description: "How chimeric pairs should be handled. \nThe options are:\n * \"\ + discard\": Discard all chimeric read pairs.\n * \"use\": Deduplicate using\ + \ read1 only. (default)\n * \"output\": Output chimeric pairs without UMI\ + \ grouping/deduplication. Only available in\n umi_tools group.\n" + info: null + example: + - "use" + required: false + choices: + - "discard" + - "use" + - "output" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--unpaired_reads" + description: "How unpaired reads should be handled. \nThe options are: \n * \"\ + discard\": Discard all unmapped reads.\n * \"use\": If read2 is unmapped, deduplicate\ + \ using read1 only. Requires --paired. (default)\n * \"output\": Output unmapped\ + \ reads/read pairs without UMI grouping/deduplication. Only available\n \ + \ in umi_tools group.\n" + info: null + example: + - "use" + required: false + choices: + - "discard" + - "use" + - "output" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--ignore_umi" + description: "Ignore the UMI and group reads using mapping coordinates only." + info: null + direction: "input" + - type: "double" + name: "--subset" + description: "Only consider a fraction of the reads, chosen at random. This is\ + \ useful for doing saturation\nanalyses.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--chrom" + description: "Only consider a single chromosome. This is useful for debugging/testing\ + \ purposes." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Group/Dedup Options" + arguments: + - type: "boolean_true" + name: "--no_sort_output" + description: "By default, output is sorted. This involves the use of a temporary\ + \ unsorted file (saved in\n--temp_dir). Use this option to turn off sorting.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--buffer_whole_contig" + description: "Forces dedup to parse an entire contig before yielding any reads\ + \ for deduplication. This is the\nonly way to absolutely guarantee that all\ + \ reads with the same start position are grouped together\nfor deduplication\ + \ since dedup uses the start position of the read, not the alignment coordinate\ + \ on\nwhich the reads are sorted. However, by default, dedup reads for another\ + \ 1000bp before outputting\nread groups which will avoid any reads being missed\ + \ with short read sequencing (<1000bp).\n" + info: null + direction: "input" +- name: "Common Options" + arguments: + - type: "file" + name: "--log" + alternatives: + - "-L" + description: "File with logging information." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--log2stderr" + description: "Send logging information to stderr." + info: null + direction: "input" + - type: "integer" + name: "--verbose" + alternatives: + - "-v" + description: "Log level. The higher, the more output. Default: `0`.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--error" + alternatives: + - "-E" + description: "File with error information." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--temp_dir" + description: "Directory for temporary files. If not set, the bash environmental\ + \ variable TMPDIR is used.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--compresslevel" + description: "Level of Gzip compression to use. Default=6 matches GNU gzip rather\ + \ than python gzip default.\nDefault: `6`.\n" + info: null + example: + - 6 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--timeit" + description: "Store timing information in file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--timeit_name" + description: "Name in timing file for this class of jobs. Default: `all`.\n" + info: null + example: + - "all" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--timeit_header" + description: "Add header for timing information." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Deduplicate reads based on the mapping co-ordinate and the UMI attached\ + \ to the read.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "umi_tools" +- "deduplication" +- "dedup" +license: "MIT" +references: + doi: + - "10.1101/gr.209601.116" +links: + repository: "https://github.com/CGATOxford/UMI-tools" + homepage: "https://umi-tools.readthedocs.io/en/latest/" + documentation: "https://umi-tools.readthedocs.io/en/latest/reference/dedup.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/umi_tools:1.1.5--py39hf95cd2a_1" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "umi_tools -v | sed 's/ version//g' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/umi_tools/umi_tools_dedup/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/umi_tools/umi_tools_dedup" + executable: "target/nextflow/umi_tools/umi_tools_dedup/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/main.nf new file mode 100644 index 0000000..626db55 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/main.nf @@ -0,0 +1,4409 @@ +// umi_tools_dedup v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "umi_tools_dedup", + "namespace" : "umi_tools", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "--stdin" + ], + "description" : "Input BAM or SAM file. Use --in_sam to specify SAM format.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--in_sam", + "description" : "By default, inputs are assumed to be in BAM format. Use this options to specify the use of SAM\nformat for input.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--bai", + "description" : "BAM index", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--random_seed", + "description" : "Random seed to initialize number generator with.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "--stdout" + ], + "description" : "Deduplicated BAM file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--out_sam", + "description" : "By default, outputa are written in BAM format. Use this options to specify the use of SAM format\nfor output.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--paired", + "description" : "BAM is paired end - output both read pairs. This will also force the use of the template length\nto determine reads with the same mapping coordinates.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--output_stats", + "description" : "Generate files containing UMI based deduplication statistics files with this prefix in the file names.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extract_umi_method", + "description" : "Specify the method by which the barcodes were encoded in the read.\nThe options are:\n * read_id (default) \n * tag\n * umis\n", + "example" : [ + "read_id" + ], + "required" : false, + "choices" : [ + "read_id", + "tag", + "umis" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umi_tag", + "description" : "The tag containing the UMI sequence. This is only required if the extract_umi_method is set to tag.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umi_separator", + "description" : "The separator used to separate the UMI from the read sequence. This is only required if the\nextract_umi_method is set to id_read. Default: `_`.\n", + "example" : [ + "_" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umi_tag_split", + "description" : "Separate the UMI in tag by and take the first element.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umi_tag_delimiter", + "description" : "Separate the UMI in by and concatenate the elements.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--cell_tag", + "description" : "The tag containing the cell barcode sequence. This is only required if the extract_umi_method\nis set to tag.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--cell_tag_split", + "description" : "Separate the cell barcode in tag by and take the first element.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--cell_tag_delimiter", + "description" : "Separate the cell barcode in by and concatenate the elements.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Grouping Options", + "arguments" : [ + { + "type" : "string", + "name" : "--method", + "description" : "The method to use for grouping reads. \nThe options are: \n * unique\n * percentile\n * cluster\n * adjacency\n * directional (default)\n", + "example" : [ + "directional" + ], + "required" : false, + "choices" : [ + "unique", + "percentile", + "cluster", + "adjacency", + "directional" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--edit_distance_threshold", + "description" : "For the adjacency and cluster methods the threshold for the edit distance to connect two\nUMIs in the network can be increased. The default value of 1 works best unless the UMI is\nvery long (>14bp). Default: `1`.\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--spliced_is_unique", + "description" : "Causes two reads that start in the same position on the same strand and having the same UMI\nto be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation\nto test for splicing).\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--soft_clip_threshold", + "description" : "Mappers that soft clip will sometimes do so rather than mapping a spliced read if there is only\na small overhang over the exon junction. By setting this option, you can treat reads with at\nleast this many bases soft-clipped at the 3' end as spliced. Default: `4`.\n", + "example" : [ + 4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--multimapping_detection_method", + "description" : "If the sam/bam contains tags to identify multimapping reads, you can specify for use when selecting\nthe best read at a given loci. Supported tags are `NH`, `X0` and `XT`. If not specified, the read\nwith the highest mapping quality will be selected.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--read_length", + "description" : "Use the read length as a criteria when deduping, for e.g. sRNA-Seq.", + "direction" : "input" + } + ] + }, + { + "name" : "Single-cell RNA-Seq Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--per_gene", + "description" : "Reads will be grouped together if they have the same gene. This is useful if your library prep\ngenerates PCR duplicates with non identical alignment positions such as CEL-Seq. Note this option\nis hardcoded to be on with the count command. I.e. counting is always performed per-gene. Must be\ncombined with either --gene_tag or --per_contig option.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--gene_tag", + "description" : "Deduplicate per gene. The gene information is encoded in the bam read tag specified.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--assigned_status_tag", + "description" : "BAM tag which describes whether a read is assigned to a gene. Defaults to the same value as given\nfor --gene_tag.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--skip_tags_regex", + "description" : "Use in conjunction with the --assigned_status_tag option to skip any reads where the tag matches\nthis regex. Default (\\"^[__|Unassigned]\\") matches anything which starts with \\"__\\" or \\"Unassigned\\".\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--per_contig", + "description" : "Deduplicate per contig (field 3 in BAM; RNAME). All reads with the sam contig will be considered to\nhave the same alignment position. This is useful if you have aligned to a reference transcriptome\nwith one transcript per gene. If you have aligned to a transcriptome with more than one transcript\nper gene, you can supply a map between transcripts and gene using the --gene_transcript_map option.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--gene_transcript_map", + "description" : "A file containing a mapping between gene names and transcript names. The file should be tab\nseparated with the gene name in the first column and the transcript name in the second column.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--per_cell", + "description" : "Reads will only be grouped together if they have the same cell barcode. Can be combined with\n--per_gene.\n", + "direction" : "input" + } + ] + }, + { + "name" : "SAM/BAM Options", + "arguments" : [ + { + "type" : "integer", + "name" : "--mapping_quality", + "description" : "Minimium mapping quality (MAPQ) for a read to be retained. Default: `0`.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--unmapped_reads", + "description" : "How unmapped reads should be handled. \nThe options are:\n * \\"discard\\": Discard all unmapped reads. (default)\n * \\"use\\": If read2 is unmapped, deduplicate using read1 only. Requires --paired.\n * \\"output\\": Output unmapped reads/read pairs without UMI grouping/deduplication. Only available in umi_tools group.\n", + "example" : [ + "discard" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--chimeric_pairs", + "description" : "How chimeric pairs should be handled. \nThe options are:\n * \\"discard\\": Discard all chimeric read pairs.\n * \\"use\\": Deduplicate using read1 only. (default)\n * \\"output\\": Output chimeric pairs without UMI grouping/deduplication. Only available in\n umi_tools group.\n", + "example" : [ + "use" + ], + "required" : false, + "choices" : [ + "discard", + "use", + "output" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--unpaired_reads", + "description" : "How unpaired reads should be handled. \nThe options are: \n * \\"discard\\": Discard all unmapped reads.\n * \\"use\\": If read2 is unmapped, deduplicate using read1 only. Requires --paired. (default)\n * \\"output\\": Output unmapped reads/read pairs without UMI grouping/deduplication. Only available\n in umi_tools group.\n", + "example" : [ + "use" + ], + "required" : false, + "choices" : [ + "discard", + "use", + "output" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--ignore_umi", + "description" : "Ignore the UMI and group reads using mapping coordinates only.", + "direction" : "input" + }, + { + "type" : "double", + "name" : "--subset", + "description" : "Only consider a fraction of the reads, chosen at random. This is useful for doing saturation\nanalyses.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--chrom", + "description" : "Only consider a single chromosome. This is useful for debugging/testing purposes.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Group/Dedup Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--no_sort_output", + "description" : "By default, output is sorted. This involves the use of a temporary unsorted file (saved in\n--temp_dir). Use this option to turn off sorting.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--buffer_whole_contig", + "description" : "Forces dedup to parse an entire contig before yielding any reads for deduplication. This is the\nonly way to absolutely guarantee that all reads with the same start position are grouped together\nfor deduplication since dedup uses the start position of the read, not the alignment coordinate on\nwhich the reads are sorted. However, by default, dedup reads for another 1000bp before outputting\nread groups which will avoid any reads being missed with short read sequencing (<1000bp).\n", + "direction" : "input" + } + ] + }, + { + "name" : "Common Options", + "arguments" : [ + { + "type" : "file", + "name" : "--log", + "alternatives" : [ + "-L" + ], + "description" : "File with logging information.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--log2stderr", + "description" : "Send logging information to stderr.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--verbose", + "alternatives" : [ + "-v" + ], + "description" : "Log level. The higher, the more output. Default: `0`.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--error", + "alternatives" : [ + "-E" + ], + "description" : "File with error information.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--temp_dir", + "description" : "Directory for temporary files. If not set, the bash environmental variable TMPDIR is used.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--compresslevel", + "description" : "Level of Gzip compression to use. Default=6 matches GNU gzip rather than python gzip default.\nDefault: `6`.\n", + "example" : [ + 6 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--timeit", + "description" : "Store timing information in file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--timeit_name", + "description" : "Name in timing file for this class of jobs. Default: `all`.\n", + "example" : [ + "all" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--timeit_header", + "description" : "Add header for timing information.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "umi_tools", + "deduplication", + "dedup" + ], + "license" : "MIT", + "references" : { + "doi" : [ + "10.1101/gr.209601.116" + ] + }, + "links" : { + "repository" : "https://github.com/CGATOxford/UMI-tools", + "homepage" : "https://umi-tools.readthedocs.io/en/latest/", + "documentation" : "https://umi-tools.readthedocs.io/en/latest/reference/dedup.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/umi_tools:1.1.5--py39hf95cd2a_1", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "umi_tools -v | sed 's/ version//g' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/umi_tools/umi_tools_dedup/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/umi_tools/umi_tools_dedup", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_IN_SAM+x} ]; then echo "${VIASH_PAR_IN_SAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_in_sam='&'#" ; else echo "# par_in_sam="; fi ) +$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) +$( if [ ! -z ${VIASH_PAR_RANDOM_SEED+x} ]; then echo "${VIASH_PAR_RANDOM_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_random_seed='&'#" ; else echo "# par_random_seed="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_OUT_SAM+x} ]; then echo "${VIASH_PAR_OUT_SAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_out_sam='&'#" ; else echo "# par_out_sam="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_STATS+x} ]; then echo "${VIASH_PAR_OUTPUT_STATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_stats='&'#" ; else echo "# par_output_stats="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRACT_UMI_METHOD+x} ]; then echo "${VIASH_PAR_EXTRACT_UMI_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extract_umi_method='&'#" ; else echo "# par_extract_umi_method="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_TAG+x} ]; then echo "${VIASH_PAR_UMI_TAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_tag='&'#" ; else echo "# par_umi_tag="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_SEPARATOR+x} ]; then echo "${VIASH_PAR_UMI_SEPARATOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_separator='&'#" ; else echo "# par_umi_separator="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_TAG_SPLIT+x} ]; then echo "${VIASH_PAR_UMI_TAG_SPLIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_tag_split='&'#" ; else echo "# par_umi_tag_split="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_TAG_DELIMITER+x} ]; then echo "${VIASH_PAR_UMI_TAG_DELIMITER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_tag_delimiter='&'#" ; else echo "# par_umi_tag_delimiter="; fi ) +$( if [ ! -z ${VIASH_PAR_CELL_TAG+x} ]; then echo "${VIASH_PAR_CELL_TAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cell_tag='&'#" ; else echo "# par_cell_tag="; fi ) +$( if [ ! -z ${VIASH_PAR_CELL_TAG_SPLIT+x} ]; then echo "${VIASH_PAR_CELL_TAG_SPLIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cell_tag_split='&'#" ; else echo "# par_cell_tag_split="; fi ) +$( if [ ! -z ${VIASH_PAR_CELL_TAG_DELIMITER+x} ]; then echo "${VIASH_PAR_CELL_TAG_DELIMITER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cell_tag_delimiter='&'#" ; else echo "# par_cell_tag_delimiter="; fi ) +$( if [ ! -z ${VIASH_PAR_METHOD+x} ]; then echo "${VIASH_PAR_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_method='&'#" ; else echo "# par_method="; fi ) +$( if [ ! -z ${VIASH_PAR_EDIT_DISTANCE_THRESHOLD+x} ]; then echo "${VIASH_PAR_EDIT_DISTANCE_THRESHOLD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_edit_distance_threshold='&'#" ; else echo "# par_edit_distance_threshold="; fi ) +$( if [ ! -z ${VIASH_PAR_SPLICED_IS_UNIQUE+x} ]; then echo "${VIASH_PAR_SPLICED_IS_UNIQUE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_spliced_is_unique='&'#" ; else echo "# par_spliced_is_unique="; fi ) +$( if [ ! -z ${VIASH_PAR_SOFT_CLIP_THRESHOLD+x} ]; then echo "${VIASH_PAR_SOFT_CLIP_THRESHOLD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_soft_clip_threshold='&'#" ; else echo "# par_soft_clip_threshold="; fi ) +$( if [ ! -z ${VIASH_PAR_MULTIMAPPING_DETECTION_METHOD+x} ]; then echo "${VIASH_PAR_MULTIMAPPING_DETECTION_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_multimapping_detection_method='&'#" ; else echo "# par_multimapping_detection_method="; fi ) +$( if [ ! -z ${VIASH_PAR_READ_LENGTH+x} ]; then echo "${VIASH_PAR_READ_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_read_length='&'#" ; else echo "# par_read_length="; fi ) +$( if [ ! -z ${VIASH_PAR_PER_GENE+x} ]; then echo "${VIASH_PAR_PER_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_per_gene='&'#" ; else echo "# par_per_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_GENE_TAG+x} ]; then echo "${VIASH_PAR_GENE_TAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gene_tag='&'#" ; else echo "# par_gene_tag="; fi ) +$( if [ ! -z ${VIASH_PAR_ASSIGNED_STATUS_TAG+x} ]; then echo "${VIASH_PAR_ASSIGNED_STATUS_TAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_assigned_status_tag='&'#" ; else echo "# par_assigned_status_tag="; fi ) +$( if [ ! -z ${VIASH_PAR_SKIP_TAGS_REGEX+x} ]; then echo "${VIASH_PAR_SKIP_TAGS_REGEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_skip_tags_regex='&'#" ; else echo "# par_skip_tags_regex="; fi ) +$( if [ ! -z ${VIASH_PAR_PER_CONTIG+x} ]; then echo "${VIASH_PAR_PER_CONTIG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_per_contig='&'#" ; else echo "# par_per_contig="; fi ) +$( if [ ! -z ${VIASH_PAR_GENE_TRANSCRIPT_MAP+x} ]; then echo "${VIASH_PAR_GENE_TRANSCRIPT_MAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gene_transcript_map='&'#" ; else echo "# par_gene_transcript_map="; fi ) +$( if [ ! -z ${VIASH_PAR_PER_CELL+x} ]; then echo "${VIASH_PAR_PER_CELL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_per_cell='&'#" ; else echo "# par_per_cell="; fi ) +$( if [ ! -z ${VIASH_PAR_MAPPING_QUALITY+x} ]; then echo "${VIASH_PAR_MAPPING_QUALITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mapping_quality='&'#" ; else echo "# par_mapping_quality="; fi ) +$( if [ ! -z ${VIASH_PAR_UNMAPPED_READS+x} ]; then echo "${VIASH_PAR_UNMAPPED_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_unmapped_reads='&'#" ; else echo "# par_unmapped_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_CHIMERIC_PAIRS+x} ]; then echo "${VIASH_PAR_CHIMERIC_PAIRS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chimeric_pairs='&'#" ; else echo "# par_chimeric_pairs="; fi ) +$( if [ ! -z ${VIASH_PAR_UNPAIRED_READS+x} ]; then echo "${VIASH_PAR_UNPAIRED_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_unpaired_reads='&'#" ; else echo "# par_unpaired_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_UMI+x} ]; then echo "${VIASH_PAR_IGNORE_UMI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_umi='&'#" ; else echo "# par_ignore_umi="; fi ) +$( if [ ! -z ${VIASH_PAR_SUBSET+x} ]; then echo "${VIASH_PAR_SUBSET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_subset='&'#" ; else echo "# par_subset="; fi ) +$( if [ ! -z ${VIASH_PAR_CHROM+x} ]; then echo "${VIASH_PAR_CHROM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chrom='&'#" ; else echo "# par_chrom="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_SORT_OUTPUT+x} ]; then echo "${VIASH_PAR_NO_SORT_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_sort_output='&'#" ; else echo "# par_no_sort_output="; fi ) +$( if [ ! -z ${VIASH_PAR_BUFFER_WHOLE_CONTIG+x} ]; then echo "${VIASH_PAR_BUFFER_WHOLE_CONTIG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_buffer_whole_contig='&'#" ; else echo "# par_buffer_whole_contig="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG2STDERR+x} ]; then echo "${VIASH_PAR_LOG2STDERR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log2stderr='&'#" ; else echo "# par_log2stderr="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_PAR_ERROR+x} ]; then echo "${VIASH_PAR_ERROR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_error='&'#" ; else echo "# par_error="; fi ) +$( if [ ! -z ${VIASH_PAR_TEMP_DIR+x} ]; then echo "${VIASH_PAR_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_temp_dir='&'#" ; else echo "# par_temp_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_COMPRESSLEVEL+x} ]; then echo "${VIASH_PAR_COMPRESSLEVEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_compresslevel='&'#" ; else echo "# par_compresslevel="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT+x} ]; then echo "${VIASH_PAR_TIMEIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit='&'#" ; else echo "# par_timeit="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT_NAME+x} ]; then echo "${VIASH_PAR_TIMEIT_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit_name='&'#" ; else echo "# par_timeit_name="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT_HEADER+x} ]; then echo "${VIASH_PAR_TIMEIT_HEADER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit_header='&'#" ; else echo "# par_timeit_header="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -e + +test_dir="\\${metal_executable}/test_data" + +unset_if_false=( + par_paired + par_in_sam + par_out_sam + par_spliced_is_unique + par_per_gene + par_per_contig + par_per_cell + par_no_sort_output + par_buffer_whole_contig + par_ignore_umi + par_subset + par_log2stderr + par_read_length +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +umi_tools dedup \\\\ + --stdin "\\$par_input" \\\\ + \\${par_in_sam:+--in-sam} \\\\ + --stdout "\\$par_output" \\\\ + \\${par_out_sam:+--out-sam} \\\\ + \\${par_paired:+--paired} \\\\ + \\${par_output_stats:+--output-stats "\\$par_output_stats"} \\\\ + \\${par_extract_umi_method:+--extract-umi-method "\\$par_extract_umi_method"} \\\\ + \\${par_umi_tag:+--umi-tag "\\$par_umi_tag"} \\\\ + \\${par_umi_separator:+--umi-separator "\\$par_umi_separator"} \\\\ + \\${par_umi_tag_split:+--umi-tag-split "\\$par_umi_tag_split"} \\\\ + \\${par_umi_tag_delimiter:+--umi-tag-delimiter "\\$par_umi_tag_delimiter"} \\\\ + \\${par_cell_tag:+--cell-tag "\\$par_cell_tag"} \\\\ + \\${par_cell_tag_split:+--cell-tag-split "\\$par_cell_tag_split"} \\\\ + \\${par_cell_tag_delimiter:+--cell-tag-delimiter "\\$par_cell_tag_delimiter"} \\\\ + \\${par_method:+--method "\\$par_method"} \\\\ + \\${par_edit_distance_threshold:+--edit-distance-threshold "\\$par_edit_distance_threshold"} \\\\ + \\${par_spliced_is_unique:+--spliced-is-unique} \\\\ + \\${par_soft_clip_threshold:+--soft-clip-threshold "\\$par_soft_clip_threshold"} \\\\ + \\${par_multimapping_detection_method:+--multimapping-detection-method "\\$par_multimapping_detection_method"} \\\\ + \\${par_read_length:+--read-length} \\\\ + \\${par_per_gene:+--per-gene} \\\\ + \\${par_gene_tag:+--gene-tag "\\$par_gene_tag"} \\\\ + \\${par_assigned_status_tag:+--assigned-status-tag "\\$par_assigned_status_tag"} \\\\ + \\${par_skip_tags_regex:+--skip-tags-regex "\\$par_skip_tags_regex"} \\\\ + \\${par_per_contig:+--per-contig} \\\\ + \\${par_gene_transcript_map:+--gene-transcript-map "\\$par_gene_transcript_map"} \\\\ + \\${par_per_cell:+--per-cell} \\\\ + \\${par_mapping_quality:+--mapping-quality "\\$par_mapping_quality"} \\\\ + \\${par_unmapped_reads:+--unmapped-reads "\\$par_unmapped_reads"} \\\\ + \\${par_chimeric_pairs:+--chimeric-pairs "\\$par_chimeric_pairs"} \\\\ + \\${par_unpaired_reads:+--unpaired-reads "\\$par_unpaired_reads"} \\\\ + \\${par_ignore_umi:+--ignore-umi} \\\\ + \\${par_subset:+--subset "\\$par_subset"} \\\\ + \\${par_chrom:+--chrom "\\$par_chrom"} \\\\ + \\${par_no_sort_output:+--no-sort-output} \\\\ + \\${par_buffer_whole_contig:+--buffer-whole-contig} \\\\ + \\${par_log:+-L "\\$par_log"} \\\\ + \\${par_log2stderr:+--log2stderr} \\\\ + \\${par_verbose:+-v "\\$par_verbose"} \\\\ + \\${par_error:+-E "\\$par_error"} \\\\ + \\${par_temp_dir:+--temp-dir "\\$par_temp_dir"} \\\\ + \\${par_compresslevel:+--compresslevel "\\$par_compresslevel"} \\\\ + \\${par_timeit:+--timeit "\\$par_timeit"} \\\\ + \\${par_timeit_name:+--timeit-name "\\$par_timeit_name"} \\\\ + \\${par_timeit_header:+--timeit-header "\\$par_timeit_header"} \\\\ + \\${par_random_seed:+--random-seed "\\$par_random_seed"} + +exit 0 +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/umi_tools/umi_tools_dedup", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/nextflow.config new file mode 100644 index 0000000..fb672c9 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'umi_tools/umi_tools_dedup' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.\n' + author = 'Emma Rousseau' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/nextflow_schema.json new file mode 100644 index 0000000..979543b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/nextflow_schema.json @@ -0,0 +1,635 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "umi_tools_dedup", +"description": "Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input BAM or SAM file", + "help_text": "Type: `file`, required. Input BAM or SAM file. Use --in_sam to specify SAM format." + + } + + + , + "in_sam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. By default, inputs are assumed to be in BAM format", + "help_text": "Type: `boolean_true`, default: `false`. By default, inputs are assumed to be in BAM format. Use this options to specify the use of SAM\nformat for input.\n" + , + "default":false + } + + + , + "bai": { + "type": + "string", + "description": "Type: `file`. BAM index", + "help_text": "Type: `file`. BAM index" + + } + + + , + "random_seed": { + "type": + "integer", + "description": "Type: `integer`. Random seed to initialize number generator with", + "help_text": "Type: `integer`. Random seed to initialize number generator with." + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output`. Deduplicated BAM file", + "help_text": "Type: `file`, required, default: `$id.$key.output`. Deduplicated BAM file." + , + "default":"$id.$key.output" + } + + + , + "out_sam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. By default, outputa are written in BAM format", + "help_text": "Type: `boolean_true`, default: `false`. By default, outputa are written in BAM format. Use this options to specify the use of SAM format\nfor output.\n" + , + "default":false + } + + + , + "paired": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. BAM is paired end - output both read pairs", + "help_text": "Type: `boolean_true`, default: `false`. BAM is paired end - output both read pairs. This will also force the use of the template length\nto determine reads with the same mapping coordinates.\n" + , + "default":false + } + + + , + "output_stats": { + "type": + "string", + "description": "Type: `string`. Generate files containing UMI based deduplication statistics files with this prefix in the file names", + "help_text": "Type: `string`. Generate files containing UMI based deduplication statistics files with this prefix in the file names.\n" + + } + + + , + "extract_umi_method": { + "type": + "string", + "description": "Type: `string`, example: `read_id`, choices: ``read_id`, `tag`, `umis``. Specify the method by which the barcodes were encoded in the read", + "help_text": "Type: `string`, example: `read_id`, choices: ``read_id`, `tag`, `umis``. Specify the method by which the barcodes were encoded in the read.\nThe options are:\n * read_id (default) \n * tag\n * umis\n", + "enum": ["read_id", "tag", "umis"] + + + } + + + , + "umi_tag": { + "type": + "string", + "description": "Type: `string`. The tag containing the UMI sequence", + "help_text": "Type: `string`. The tag containing the UMI sequence. This is only required if the extract_umi_method is set to tag.\n" + + } + + + , + "umi_separator": { + "type": + "string", + "description": "Type: `string`, example: `_`. The separator used to separate the UMI from the read sequence", + "help_text": "Type: `string`, example: `_`. The separator used to separate the UMI from the read sequence. This is only required if the\nextract_umi_method is set to id_read. Default: `_`.\n" + + } + + + , + "umi_tag_split": { + "type": + "string", + "description": "Type: `string`. Separate the UMI in tag by \u003cSPLIT\u003e and take the first element", + "help_text": "Type: `string`. Separate the UMI in tag by \u003cSPLIT\u003e and take the first element." + + } + + + , + "umi_tag_delimiter": { + "type": + "string", + "description": "Type: `string`. Separate the UMI in by \u003cDELIMITER\u003e and concatenate the elements", + "help_text": "Type: `string`. Separate the UMI in by \u003cDELIMITER\u003e and concatenate the elements." + + } + + + , + "cell_tag": { + "type": + "string", + "description": "Type: `string`. The tag containing the cell barcode sequence", + "help_text": "Type: `string`. The tag containing the cell barcode sequence. This is only required if the extract_umi_method\nis set to tag.\n" + + } + + + , + "cell_tag_split": { + "type": + "string", + "description": "Type: `string`. Separate the cell barcode in tag by \u003cSPLIT\u003e and take the first element", + "help_text": "Type: `string`. Separate the cell barcode in tag by \u003cSPLIT\u003e and take the first element." + + } + + + , + "cell_tag_delimiter": { + "type": + "string", + "description": "Type: `string`. Separate the cell barcode in by \u003cDELIMITER\u003e and concatenate the elements", + "help_text": "Type: `string`. Separate the cell barcode in by \u003cDELIMITER\u003e and concatenate the elements." + + } + + +} +}, + + + "grouping options" : { + "title": "Grouping Options", + "type": "object", + "description": "No description", + "properties": { + + + "method": { + "type": + "string", + "description": "Type: `string`, example: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. The method to use for grouping reads", + "help_text": "Type: `string`, example: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. The method to use for grouping reads. \nThe options are: \n * unique\n * percentile\n * cluster\n * adjacency\n * directional (default)\n", + "enum": ["unique", "percentile", "cluster", "adjacency", "directional"] + + + } + + + , + "edit_distance_threshold": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. For the adjacency and cluster methods the threshold for the edit distance to connect two\nUMIs in the network can be increased", + "help_text": "Type: `integer`, example: `1`. For the adjacency and cluster methods the threshold for the edit distance to connect two\nUMIs in the network can be increased. The default value of 1 works best unless the UMI is\nvery long (\u003e14bp). Default: `1`.\n" + + } + + + , + "spliced_is_unique": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Causes two reads that start in the same position on the same strand and having the same UMI\nto be considered unique if one is spliced and the other is not", + "help_text": "Type: `boolean_true`, default: `false`. Causes two reads that start in the same position on the same strand and having the same UMI\nto be considered unique if one is spliced and the other is not. (Uses the \u0027N\u0027 cigar operation\nto test for splicing).\n" + , + "default":false + } + + + , + "soft_clip_threshold": { + "type": + "integer", + "description": "Type: `integer`, example: `4`. Mappers that soft clip will sometimes do so rather than mapping a spliced read if there is only\na small overhang over the exon junction", + "help_text": "Type: `integer`, example: `4`. Mappers that soft clip will sometimes do so rather than mapping a spliced read if there is only\na small overhang over the exon junction. By setting this option, you can treat reads with at\nleast this many bases soft-clipped at the 3\u0027 end as spliced. Default: `4`.\n" + + } + + + , + "multimapping_detection_method": { + "type": + "string", + "description": "Type: `string`. If the sam/bam contains tags to identify multimapping reads, you can specify for use when selecting\nthe best read at a given loci", + "help_text": "Type: `string`. If the sam/bam contains tags to identify multimapping reads, you can specify for use when selecting\nthe best read at a given loci. Supported tags are `NH`, `X0` and `XT`. If not specified, the read\nwith the highest mapping quality will be selected.\n" + + } + + + , + "read_length": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use the read length as a criteria when deduping, for e", + "help_text": "Type: `boolean_true`, default: `false`. Use the read length as a criteria when deduping, for e.g. sRNA-Seq." + , + "default":false + } + + +} +}, + + + "single-cell rna-seq options" : { + "title": "Single-cell RNA-Seq Options", + "type": "object", + "description": "No description", + "properties": { + + + "per_gene": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Reads will be grouped together if they have the same gene", + "help_text": "Type: `boolean_true`, default: `false`. Reads will be grouped together if they have the same gene. This is useful if your library prep\ngenerates PCR duplicates with non identical alignment positions such as CEL-Seq. Note this option\nis hardcoded to be on with the count command. I.e. counting is always performed per-gene. Must be\ncombined with either --gene_tag or --per_contig option.\n" + , + "default":false + } + + + , + "gene_tag": { + "type": + "string", + "description": "Type: `string`. Deduplicate per gene", + "help_text": "Type: `string`. Deduplicate per gene. The gene information is encoded in the bam read tag specified.\n" + + } + + + , + "assigned_status_tag": { + "type": + "string", + "description": "Type: `string`. BAM tag which describes whether a read is assigned to a gene", + "help_text": "Type: `string`. BAM tag which describes whether a read is assigned to a gene. Defaults to the same value as given\nfor --gene_tag.\n" + + } + + + , + "skip_tags_regex": { + "type": + "string", + "description": "Type: `string`. Use in conjunction with the --assigned_status_tag option to skip any reads where the tag matches\nthis regex", + "help_text": "Type: `string`. Use in conjunction with the --assigned_status_tag option to skip any reads where the tag matches\nthis regex. Default (\"^[__|Unassigned]\") matches anything which starts with \"__\" or \"Unassigned\".\n" + + } + + + , + "per_contig": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Deduplicate per contig (field 3 in BAM; RNAME)", + "help_text": "Type: `boolean_true`, default: `false`. Deduplicate per contig (field 3 in BAM; RNAME). All reads with the sam contig will be considered to\nhave the same alignment position. This is useful if you have aligned to a reference transcriptome\nwith one transcript per gene. If you have aligned to a transcriptome with more than one transcript\nper gene, you can supply a map between transcripts and gene using the --gene_transcript_map option.\n" + , + "default":false + } + + + , + "gene_transcript_map": { + "type": + "string", + "description": "Type: `file`. A file containing a mapping between gene names and transcript names", + "help_text": "Type: `file`. A file containing a mapping between gene names and transcript names. The file should be tab\nseparated with the gene name in the first column and the transcript name in the second column.\n" + + } + + + , + "per_cell": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Reads will only be grouped together if they have the same cell barcode", + "help_text": "Type: `boolean_true`, default: `false`. Reads will only be grouped together if they have the same cell barcode. Can be combined with\n--per_gene.\n" + , + "default":false + } + + +} +}, + + + "sam/bam options" : { + "title": "SAM/BAM Options", + "type": "object", + "description": "No description", + "properties": { + + + "mapping_quality": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Minimium mapping quality (MAPQ) for a read to be retained", + "help_text": "Type: `integer`, example: `0`. Minimium mapping quality (MAPQ) for a read to be retained. Default: `0`.\n" + + } + + + , + "unmapped_reads": { + "type": + "string", + "description": "Type: `string`, example: `discard`. How unmapped reads should be handled", + "help_text": "Type: `string`, example: `discard`. How unmapped reads should be handled. \nThe options are:\n * \"discard\": Discard all unmapped reads. (default)\n * \"use\": If read2 is unmapped, deduplicate using read1 only. Requires --paired.\n * \"output\": Output unmapped reads/read pairs without UMI grouping/deduplication. Only available in umi_tools group.\n" + + } + + + , + "chimeric_pairs": { + "type": + "string", + "description": "Type: `string`, example: `use`, choices: ``discard`, `use`, `output``. How chimeric pairs should be handled", + "help_text": "Type: `string`, example: `use`, choices: ``discard`, `use`, `output``. How chimeric pairs should be handled. \nThe options are:\n * \"discard\": Discard all chimeric read pairs.\n * \"use\": Deduplicate using read1 only. (default)\n * \"output\": Output chimeric pairs without UMI grouping/deduplication. Only available in\n umi_tools group.\n", + "enum": ["discard", "use", "output"] + + + } + + + , + "unpaired_reads": { + "type": + "string", + "description": "Type: `string`, example: `use`, choices: ``discard`, `use`, `output``. How unpaired reads should be handled", + "help_text": "Type: `string`, example: `use`, choices: ``discard`, `use`, `output``. How unpaired reads should be handled. \nThe options are: \n * \"discard\": Discard all unmapped reads.\n * \"use\": If read2 is unmapped, deduplicate using read1 only. Requires --paired. (default)\n * \"output\": Output unmapped reads/read pairs without UMI grouping/deduplication. Only available\n in umi_tools group.\n", + "enum": ["discard", "use", "output"] + + + } + + + , + "ignore_umi": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Ignore the UMI and group reads using mapping coordinates only", + "help_text": "Type: `boolean_true`, default: `false`. Ignore the UMI and group reads using mapping coordinates only." + , + "default":false + } + + + , + "subset": { + "type": + "number", + "description": "Type: `double`. Only consider a fraction of the reads, chosen at random", + "help_text": "Type: `double`. Only consider a fraction of the reads, chosen at random. This is useful for doing saturation\nanalyses.\n" + + } + + + , + "chrom": { + "type": + "string", + "description": "Type: `string`. Only consider a single chromosome", + "help_text": "Type: `string`. Only consider a single chromosome. This is useful for debugging/testing purposes." + + } + + +} +}, + + + "group/dedup options" : { + "title": "Group/Dedup Options", + "type": "object", + "description": "No description", + "properties": { + + + "no_sort_output": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. By default, output is sorted", + "help_text": "Type: `boolean_true`, default: `false`. By default, output is sorted. This involves the use of a temporary unsorted file (saved in\n--temp_dir). Use this option to turn off sorting.\n" + , + "default":false + } + + + , + "buffer_whole_contig": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Forces dedup to parse an entire contig before yielding any reads for deduplication", + "help_text": "Type: `boolean_true`, default: `false`. Forces dedup to parse an entire contig before yielding any reads for deduplication. This is the\nonly way to absolutely guarantee that all reads with the same start position are grouped together\nfor deduplication since dedup uses the start position of the read, not the alignment coordinate on\nwhich the reads are sorted. However, by default, dedup reads for another 1000bp before outputting\nread groups which will avoid any reads being missed with short read sequencing (\u003c1000bp).\n" + , + "default":false + } + + +} +}, + + + "common options" : { + "title": "Common Options", + "type": "object", + "description": "No description", + "properties": { + + + "log": { + "type": + "string", + "description": "Type: `file`. File with logging information", + "help_text": "Type: `file`. File with logging information." + + } + + + , + "log2stderr": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Send logging information to stderr", + "help_text": "Type: `boolean_true`, default: `false`. Send logging information to stderr." + , + "default":false + } + + + , + "verbose": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Log level", + "help_text": "Type: `integer`, example: `0`. Log level. The higher, the more output. Default: `0`.\n" + + } + + + , + "error": { + "type": + "string", + "description": "Type: `file`. File with error information", + "help_text": "Type: `file`. File with error information." + + } + + + , + "temp_dir": { + "type": + "string", + "description": "Type: `string`. Directory for temporary files", + "help_text": "Type: `string`. Directory for temporary files. If not set, the bash environmental variable TMPDIR is used.\n" + + } + + + , + "compresslevel": { + "type": + "integer", + "description": "Type: `integer`, example: `6`. Level of Gzip compression to use", + "help_text": "Type: `integer`, example: `6`. Level of Gzip compression to use. Default=6 matches GNU gzip rather than python gzip default.\nDefault: `6`.\n" + + } + + + , + "timeit": { + "type": + "string", + "description": "Type: `file`. Store timing information in file", + "help_text": "Type: `file`. Store timing information in file." + + } + + + , + "timeit_name": { + "type": + "string", + "description": "Type: `string`, example: `all`. Name in timing file for this class of jobs", + "help_text": "Type: `string`, example: `all`. Name in timing file for this class of jobs. Default: `all`.\n" + + } + + + , + "timeit_header": { + "type": + "string", + "description": "Type: `string`. Add header for timing information", + "help_text": "Type: `string`. Add header for timing information." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/grouping options" + }, + + { + "$ref": "#/definitions/single-cell rna-seq options" + }, + + { + "$ref": "#/definitions/sam/bam options" + }, + + { + "$ref": "#/definitions/group/dedup options" + }, + + { + "$ref": "#/definitions/common options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml new file mode 100644 index 0000000..fd033e2 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml @@ -0,0 +1,475 @@ +name: "umi_tools_extract" +namespace: "umi_tools" +version: "v0.3.1" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "File containing the input data." + info: null + example: + - "sample.fastq" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read2_in" + description: "File containing the input data for the R2 reads (if paired). If\ + \ provided, a need to be provided." + info: null + example: + - "sample_R2.fastq" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--bc_pattern" + alternatives: + - "-p" + description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the\ + \ first 6 nucleotides \nof the read are from the UMI.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--bc_pattern2" + description: "The UMI barcode pattern to use for read 2." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + description: "Output file for read 1." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read2_out" + description: "Output file for read 2." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--filtered_out" + description: "Write out reads not matching regex pattern or cell barcode whitelist\ + \ to this file.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--filtered_out2" + description: "Write out read pairs not matching regex pattern or cell barcode\ + \ whitelist to this file.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Extract Options" + arguments: + - type: "string" + name: "--extract_method" + description: "UMI pattern to use. Default: `string`.\n" + info: null + example: + - "string" + required: false + choices: + - "string" + - "regex" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--error_correct_cell" + description: "Error correct cell barcodes to the whitelist." + info: null + direction: "input" + - type: "file" + name: "--whitelist" + description: "Whitelist of accepted cell barcodes tab-separated format, where\ + \ column 1 is the whitelisted\ncell barcodes and column 2 is the list (comma-separated)\ + \ of other cell barcodes which should \nbe corrected to the barcode in column\ + \ 1. If the --error_correct_cell option is not used, this\ncolumn will be ignored.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--blacklist" + description: "BlackWhitelist of cell barcodes to discard." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--subset_reads" + description: "Only parse the first N reads." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--quality_filter_threshold" + description: "Remove reads where any UMI base quality score falls below this threshold." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--quality_filter_mask" + description: "If a UMI base has a quality below this threshold, replace the base\ + \ with 'N'.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--quality_encoding" + description: "Quality score encoding. Choose from:\n * phred33 [33-77]\n * phred64\ + \ [64-106]\n * solexa [59-106]\n" + info: null + required: false + choices: + - "phred33" + - "phred64" + - "solexa" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--reconcile_pairs" + description: "Allow read 2 infile to contain reads not in read 1 infile. This\ + \ enables support for upstream protocols\nwhere read one contains cell barcodes,\ + \ and the read pairs have been filtered and corrected without regard\nto the\ + \ read2.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--three_prime" + alternatives: + - "--3prime" + description: "By default the barcode is assumed to be on the 5' end of the read,\ + \ but use this option to sepecify that it is\non the 3' end instead. This option\ + \ only works with --extract_method=string since 3' encoding can be specified\n\ + explicitly with a regex, e.g `.*(?P.{5})$`.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--ignore_read_pair_suffixes" + description: "Ignore \"/1\" and \"/2\" read name suffixes. Note that this options\ + \ is required if the suffixes are not whitespace\nseparated from the rest of\ + \ the read name.\narguments:\n" + info: null + direction: "input" + - type: "string" + name: "--umi_separator" + description: "The character that separates the UMI in the read name. Most likely\ + \ a colon if you skipped the extraction with\nUMI-tools and used other software.\ + \ Default: `_`\n" + info: null + example: + - "_" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--grouping_method" + description: "Method to use to determine read groups by subsuming those with similar\ + \ UMIs. All methods start by identifying\nthe reads with the same mapping position,\ + \ but treat similar yet nonidentical UMIs differently. Default: `directional`\n" + info: null + example: + - "directional" + required: false + choices: + - "unique" + - "percentile" + - "cluster" + - "adjacency" + - "directional" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Common Options" + arguments: + - type: "file" + name: "--log" + description: "File with logging information." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--log2stderr" + description: "Send logging information to stderr." + info: null + direction: "input" + - type: "integer" + name: "--verbose" + description: "Log level. The higher, the more output." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--error" + description: "File with error information." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--temp_dir" + description: "Directory for temporary files. If not set, the bash environmental\ + \ variable TMPDIR is used.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--compresslevel" + description: "Level of Gzip compression to use. Default=6 matches GNU gzip rather\ + \ than python gzip default (which is 9).\nDefault `6`.\n" + info: null + example: + - 6 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--timeit" + description: "Store timing information in file." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--timeit_name" + description: "Name in timing file for this class of jobs." + info: null + default: + - "all" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--timeit_header" + description: "Add header for timing information." + info: null + direction: "input" + - type: "integer" + name: "--random_seed" + description: "Random seed to initialize number generator with." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Flexible removal of UMI sequences from fastq reads.\nUMIs are removed\ + \ and appended to the read name. Any other barcode, for example a library barcode,\n\ + is left on the read. Can also filter reads by quality or against a whitelist.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "extract" +- "umi-tools" +- "umi" +- "fastq" +license: "MIT" +references: + doi: + - "10.1101/gr.209601.116" +links: + repository: "https://github.com/CGATOxford/UMI-tools" + homepage: "https://umi-tools.readthedocs.io/en/latest/" + documentation: "https://umi-tools.readthedocs.io/en/latest/reference/extract.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/umi_tools:1.1.4--py310h4b81fae_2" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "umi_tools -v | sed 's/ version//g' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/umi_tools/umi_tools_extract/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/umi_tools/umi_tools_extract" + executable: "target/nextflow/umi_tools/umi_tools_extract/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/main.nf new file mode 100644 index 0000000..93c1904 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/main.nf @@ -0,0 +1,4210 @@ +// umi_tools_extract v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "umi_tools_extract", + "namespace" : "umi_tools", + "version" : "v0.3.1", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "File containing the input data.", + "example" : [ + "sample.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read2_in", + "description" : "File containing the input data for the R2 reads (if paired). If provided, a need to be provided.", + "example" : [ + "sample_R2.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--bc_pattern", + "alternatives" : [ + "-p" + ], + "description" : "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides \nof the read are from the UMI.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--bc_pattern2", + "description" : "The UMI barcode pattern to use for read 2.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Output file for read 1.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read2_out", + "description" : "Output file for read 2.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--filtered_out", + "description" : "Write out reads not matching regex pattern or cell barcode whitelist to this file.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--filtered_out2", + "description" : "Write out read pairs not matching regex pattern or cell barcode whitelist to this file.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Extract Options", + "arguments" : [ + { + "type" : "string", + "name" : "--extract_method", + "description" : "UMI pattern to use. Default: `string`.\n", + "example" : [ + "string" + ], + "required" : false, + "choices" : [ + "string", + "regex" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--error_correct_cell", + "description" : "Error correct cell barcodes to the whitelist.", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--whitelist", + "description" : "Whitelist of accepted cell barcodes tab-separated format, where column 1 is the whitelisted\ncell barcodes and column 2 is the list (comma-separated) of other cell barcodes which should \nbe corrected to the barcode in column 1. If the --error_correct_cell option is not used, this\ncolumn will be ignored.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--blacklist", + "description" : "BlackWhitelist of cell barcodes to discard.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--subset_reads", + "description" : "Only parse the first N reads.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--quality_filter_threshold", + "description" : "Remove reads where any UMI base quality score falls below this threshold.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--quality_filter_mask", + "description" : "If a UMI base has a quality below this threshold, replace the base with 'N'.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--quality_encoding", + "description" : "Quality score encoding. Choose from:\n * phred33 [33-77]\n * phred64 [64-106]\n * solexa [59-106]\n", + "required" : false, + "choices" : [ + "phred33", + "phred64", + "solexa" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--reconcile_pairs", + "description" : "Allow read 2 infile to contain reads not in read 1 infile. This enables support for upstream protocols\nwhere read one contains cell barcodes, and the read pairs have been filtered and corrected without regard\nto the read2.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--three_prime", + "alternatives" : [ + "--3prime" + ], + "description" : "By default the barcode is assumed to be on the 5' end of the read, but use this option to sepecify that it is\non the 3' end instead. This option only works with --extract_method=string since 3' encoding can be specified\nexplicitly with a regex, e.g `.*(?P.{5})$`.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--ignore_read_pair_suffixes", + "description" : "Ignore \\"/1\\" and \\"/2\\" read name suffixes. Note that this options is required if the suffixes are not whitespace\nseparated from the rest of the read name.\narguments:\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--umi_separator", + "description" : "The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with\nUMI-tools and used other software. Default: `_`\n", + "example" : [ + "_" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--grouping_method", + "description" : "Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying\nthe reads with the same mapping position, but treat similar yet nonidentical UMIs differently. Default: `directional`\n", + "example" : [ + "directional" + ], + "required" : false, + "choices" : [ + "unique", + "percentile", + "cluster", + "adjacency", + "directional" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Common Options", + "arguments" : [ + { + "type" : "file", + "name" : "--log", + "description" : "File with logging information.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--log2stderr", + "description" : "Send logging information to stderr.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--verbose", + "description" : "Log level. The higher, the more output.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--error", + "description" : "File with error information.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--temp_dir", + "description" : "Directory for temporary files. If not set, the bash environmental variable TMPDIR is used.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--compresslevel", + "description" : "Level of Gzip compression to use. Default=6 matches GNU gzip rather than python gzip default (which is 9).\nDefault `6`.\n", + "example" : [ + 6 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--timeit", + "description" : "Store timing information in file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--timeit_name", + "description" : "Name in timing file for this class of jobs.", + "default" : [ + "all" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--timeit_header", + "description" : "Add header for timing information.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--random_seed", + "description" : "Random seed to initialize number generator with.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Flexible removal of UMI sequences from fastq reads.\nUMIs are removed and appended to the read name. Any other barcode, for example a library barcode,\nis left on the read. Can also filter reads by quality or against a whitelist.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "extract", + "umi-tools", + "umi", + "fastq" + ], + "license" : "MIT", + "references" : { + "doi" : [ + "10.1101/gr.209601.116" + ] + }, + "links" : { + "repository" : "https://github.com/CGATOxford/UMI-tools", + "homepage" : "https://umi-tools.readthedocs.io/en/latest/", + "documentation" : "https://umi-tools.readthedocs.io/en/latest/reference/extract.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/umi_tools:1.1.4--py310h4b81fae_2", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "umi_tools -v | sed 's/ version//g' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/umi_tools/umi_tools_extract/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/umi_tools/umi_tools_extract", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_READ2_IN+x} ]; then echo "${VIASH_PAR_READ2_IN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_read2_in='&'#" ; else echo "# par_read2_in="; fi ) +$( if [ ! -z ${VIASH_PAR_BC_PATTERN+x} ]; then echo "${VIASH_PAR_BC_PATTERN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bc_pattern='&'#" ; else echo "# par_bc_pattern="; fi ) +$( if [ ! -z ${VIASH_PAR_BC_PATTERN2+x} ]; then echo "${VIASH_PAR_BC_PATTERN2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bc_pattern2='&'#" ; else echo "# par_bc_pattern2="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_READ2_OUT+x} ]; then echo "${VIASH_PAR_READ2_OUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_read2_out='&'#" ; else echo "# par_read2_out="; fi ) +$( if [ ! -z ${VIASH_PAR_FILTERED_OUT+x} ]; then echo "${VIASH_PAR_FILTERED_OUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_filtered_out='&'#" ; else echo "# par_filtered_out="; fi ) +$( if [ ! -z ${VIASH_PAR_FILTERED_OUT2+x} ]; then echo "${VIASH_PAR_FILTERED_OUT2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_filtered_out2='&'#" ; else echo "# par_filtered_out2="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRACT_METHOD+x} ]; then echo "${VIASH_PAR_EXTRACT_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extract_method='&'#" ; else echo "# par_extract_method="; fi ) +$( if [ ! -z ${VIASH_PAR_ERROR_CORRECT_CELL+x} ]; then echo "${VIASH_PAR_ERROR_CORRECT_CELL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_error_correct_cell='&'#" ; else echo "# par_error_correct_cell="; fi ) +$( if [ ! -z ${VIASH_PAR_WHITELIST+x} ]; then echo "${VIASH_PAR_WHITELIST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_whitelist='&'#" ; else echo "# par_whitelist="; fi ) +$( if [ ! -z ${VIASH_PAR_BLACKLIST+x} ]; then echo "${VIASH_PAR_BLACKLIST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_blacklist='&'#" ; else echo "# par_blacklist="; fi ) +$( if [ ! -z ${VIASH_PAR_SUBSET_READS+x} ]; then echo "${VIASH_PAR_SUBSET_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_subset_reads='&'#" ; else echo "# par_subset_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_QUALITY_FILTER_THRESHOLD+x} ]; then echo "${VIASH_PAR_QUALITY_FILTER_THRESHOLD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quality_filter_threshold='&'#" ; else echo "# par_quality_filter_threshold="; fi ) +$( if [ ! -z ${VIASH_PAR_QUALITY_FILTER_MASK+x} ]; then echo "${VIASH_PAR_QUALITY_FILTER_MASK}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quality_filter_mask='&'#" ; else echo "# par_quality_filter_mask="; fi ) +$( if [ ! -z ${VIASH_PAR_QUALITY_ENCODING+x} ]; then echo "${VIASH_PAR_QUALITY_ENCODING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quality_encoding='&'#" ; else echo "# par_quality_encoding="; fi ) +$( if [ ! -z ${VIASH_PAR_RECONCILE_PAIRS+x} ]; then echo "${VIASH_PAR_RECONCILE_PAIRS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reconcile_pairs='&'#" ; else echo "# par_reconcile_pairs="; fi ) +$( if [ ! -z ${VIASH_PAR_THREE_PRIME+x} ]; then echo "${VIASH_PAR_THREE_PRIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_three_prime='&'#" ; else echo "# par_three_prime="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_READ_PAIR_SUFFIXES+x} ]; then echo "${VIASH_PAR_IGNORE_READ_PAIR_SUFFIXES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_read_pair_suffixes='&'#" ; else echo "# par_ignore_read_pair_suffixes="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_SEPARATOR+x} ]; then echo "${VIASH_PAR_UMI_SEPARATOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_separator='&'#" ; else echo "# par_umi_separator="; fi ) +$( if [ ! -z ${VIASH_PAR_GROUPING_METHOD+x} ]; then echo "${VIASH_PAR_GROUPING_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_grouping_method='&'#" ; else echo "# par_grouping_method="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG2STDERR+x} ]; then echo "${VIASH_PAR_LOG2STDERR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log2stderr='&'#" ; else echo "# par_log2stderr="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_PAR_ERROR+x} ]; then echo "${VIASH_PAR_ERROR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_error='&'#" ; else echo "# par_error="; fi ) +$( if [ ! -z ${VIASH_PAR_TEMP_DIR+x} ]; then echo "${VIASH_PAR_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_temp_dir='&'#" ; else echo "# par_temp_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_COMPRESSLEVEL+x} ]; then echo "${VIASH_PAR_COMPRESSLEVEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_compresslevel='&'#" ; else echo "# par_compresslevel="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT+x} ]; then echo "${VIASH_PAR_TIMEIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit='&'#" ; else echo "# par_timeit="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT_NAME+x} ]; then echo "${VIASH_PAR_TIMEIT_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit_name='&'#" ; else echo "# par_timeit_name="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT_HEADER+x} ]; then echo "${VIASH_PAR_TIMEIT_HEADER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit_header='&'#" ; else echo "# par_timeit_header="; fi ) +$( if [ ! -z ${VIASH_PAR_RANDOM_SEED+x} ]; then echo "${VIASH_PAR_RANDOM_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_random_seed='&'#" ; else echo "# par_random_seed="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -exo pipefail + +unset_if_false=( + par_error_correct_cell + par_reconcile_pairs + par_three_prime + par_ignore_read_pair_suffixes + par_timeit_header + par_log2stderr +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + + +# Check if we have the correct number of input files and patterns for paired-end or single-end reads + +# For paired-end rends, check that we have two read files, two patterns +# Check for paired-end inputs +if [ -n "\\$par_input" ] && [ -n "\\$par_read2_in" ]; then + # Paired-end checks: Ensure both UMI patterns are provided + if [ -z "\\$par_bc_pattern" ] || [ -z "\\$par_bc_pattern2" ]; then + echo "Paired end input requires two UMI patterns." + exit 1 + fi +elif [ -n "\\$par_input" ]; then + # Single-end checks: Ensure no second read or UMI pattern for the second read is provided + if [ -n "\\$par_bc_pattern2" ]; then + echo "Single end input requires only one read file and one UMI pattern." + exit 1 + fi + # Check that discard_read is not set or set to 0 for single-end reads + if [ -n "\\$par_umi_discard_read" ] && [ "\\$par_umi_discard_read" != 0 ]; then + echo "umi_discard_read is only valid when processing paired end reads." + exit 1 + fi +else + # No inputs provided + echo "No input files provided." + exit 1 +fi + + + + +umi_tools extract \\\\ + -I "\\$par_input" \\\\ + \\${par_read2_in:+ --read2-in "\\$par_read2_in"} \\\\ + -S "\\$par_output" \\\\ + \\${par_read2_out:+--read2-out "\\$par_read2_out"} \\\\ + \\${par_extract_method:+--extract-method "\\$par_extract_method"} \\\\ + --bc-pattern "\\$par_bc_pattern" \\\\ + \\${par_bc_pattern2:+ --bc-pattern2 "\\$par_bc_pattern2"} \\\\ + \\${par_umi_separator:+--umi-separator "\\$par_umi_separator"} \\\\ + \\${par_output_stats:+--output-stats "\\$par_output_stats"} \\\\ + \\${par_error_correct_cell:+--error-correct-cell} \\\\ + \\${par_whitelist:+--whitelist "\\$par_whitelist"} \\\\ + \\${par_blacklist:+--blacklist "\\$par_blacklist"} \\\\ + \\${par_subset_reads:+--subset-reads "\\$par_subset_reads"} \\\\ + \\${par_quality_filter_threshold:+--quality-filter-threshold "\\$par_quality_filter_threshold"} \\\\ + \\${par_quality_filter_mask:+--quality-filter-mask "\\$par_quality_filter_mask"} \\\\ + \\${par_quality_encoding:+--quality-encoding "\\$par_quality_encoding"} \\\\ + \\${par_reconcile_pairs:+--reconcile-pairs} \\\\ + \\${par_three_prime:+--3prime} \\\\ + \\${par_filtered_out:+--filtered-out "\\$par_filtered_out"} \\\\ + \\${par_filtered_out2:+--filtered-out2 "\\$par_filtered_out2"} \\\\ + \\${par_ignore_read_pair_suffixes:+--ignore-read-pair-suffixes} \\\\ + \\${par_random_seed:+--random-seed "\\$par_random_seed"} \\\\ + \\${par_temp_dir:+--temp-dir "\\$par_temp_dir"} \\\\ + \\${par_compresslevel:+--compresslevel "\\$par_compresslevel"} \\\\ + \\${par_timeit:+--timeit "\\$par_timeit"} \\\\ + \\${par_timeit_name:+--timeit-name "\\$par_timeit_name"} \\\\ + \\${par_timeit_header:+--timeit-header} \\\\ + \\${par_log:+--log "\\$par_log"} \\\\ + \\${par_log2stderr:+--log2stderr} \\\\ + \\${par_verbose:+--verbose "\\$par_verbose"} \\\\ + \\${par_error:+--error "\\$par_error"} +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/umi_tools/umi_tools_extract", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/nextflow.config new file mode 100644 index 0000000..ee73e5a --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'umi_tools/umi_tools_extract' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Flexible removal of UMI sequences from fastq reads.\nUMIs are removed and appended to the read name. Any other barcode, for example a library barcode,\nis left on the read. Can also filter reads by quality or against a whitelist.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/nextflow_schema.json new file mode 100644 index 0000000..e8f0501 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/nextflow_schema.json @@ -0,0 +1,430 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "umi_tools_extract", +"description": "Flexible removal of UMI sequences from fastq reads.\nUMIs are removed and appended to the read name. Any other barcode, for example a library barcode,\nis left on the read. Can also filter reads by quality or against a whitelist.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `sample.fastq`. File containing the input data", + "help_text": "Type: `file`, required, example: `sample.fastq`. File containing the input data." + + } + + + , + "read2_in": { + "type": + "string", + "description": "Type: `file`, example: `sample_R2.fastq`. File containing the input data for the R2 reads (if paired)", + "help_text": "Type: `file`, example: `sample_R2.fastq`. File containing the input data for the R2 reads (if paired). If provided, a \u003clist of other required arguments\u003e need to be provided." + + } + + + , + "bc_pattern": { + "type": + "string", + "description": "Type: `string`. The UMI barcode pattern to use e", + "help_text": "Type: `string`. The UMI barcode pattern to use e.g. \u0027NNNNNN\u0027 indicates that the first 6 nucleotides \nof the read are from the UMI.\n" + + } + + + , + "bc_pattern2": { + "type": + "string", + "description": "Type: `string`. The UMI barcode pattern to use for read 2", + "help_text": "Type: `string`. The UMI barcode pattern to use for read 2." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output`. Output file for read 1", + "help_text": "Type: `file`, required, default: `$id.$key.output`. Output file for read 1." + , + "default":"$id.$key.output" + } + + + , + "read2_out": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.read2_out`. Output file for read 2", + "help_text": "Type: `file`, default: `$id.$key.read2_out`. Output file for read 2." + , + "default":"$id.$key.read2_out" + } + + + , + "filtered_out": { + "type": + "string", + "description": "Type: `file`. Write out reads not matching regex pattern or cell barcode whitelist to this file", + "help_text": "Type: `file`. Write out reads not matching regex pattern or cell barcode whitelist to this file.\n" + + } + + + , + "filtered_out2": { + "type": + "string", + "description": "Type: `file`. Write out read pairs not matching regex pattern or cell barcode whitelist to this file", + "help_text": "Type: `file`. Write out read pairs not matching regex pattern or cell barcode whitelist to this file.\n" + + } + + +} +}, + + + "extract options" : { + "title": "Extract Options", + "type": "object", + "description": "No description", + "properties": { + + + "extract_method": { + "type": + "string", + "description": "Type: `string`, example: `string`, choices: ``string`, `regex``. UMI pattern to use", + "help_text": "Type: `string`, example: `string`, choices: ``string`, `regex``. UMI pattern to use. Default: `string`.\n", + "enum": ["string", "regex"] + + + } + + + , + "error_correct_cell": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Error correct cell barcodes to the whitelist", + "help_text": "Type: `boolean_true`, default: `false`. Error correct cell barcodes to the whitelist." + , + "default":false + } + + + , + "whitelist": { + "type": + "string", + "description": "Type: `file`. Whitelist of accepted cell barcodes tab-separated format, where column 1 is the whitelisted\ncell barcodes and column 2 is the list (comma-separated) of other cell barcodes which should \nbe corrected to the barcode in column 1", + "help_text": "Type: `file`. Whitelist of accepted cell barcodes tab-separated format, where column 1 is the whitelisted\ncell barcodes and column 2 is the list (comma-separated) of other cell barcodes which should \nbe corrected to the barcode in column 1. If the --error_correct_cell option is not used, this\ncolumn will be ignored.\n" + + } + + + , + "blacklist": { + "type": + "string", + "description": "Type: `file`. BlackWhitelist of cell barcodes to discard", + "help_text": "Type: `file`. BlackWhitelist of cell barcodes to discard." + + } + + + , + "subset_reads": { + "type": + "integer", + "description": "Type: `integer`. Only parse the first N reads", + "help_text": "Type: `integer`. Only parse the first N reads." + + } + + + , + "quality_filter_threshold": { + "type": + "integer", + "description": "Type: `integer`. Remove reads where any UMI base quality score falls below this threshold", + "help_text": "Type: `integer`. Remove reads where any UMI base quality score falls below this threshold." + + } + + + , + "quality_filter_mask": { + "type": + "string", + "description": "Type: `string`. If a UMI base has a quality below this threshold, replace the base with \u0027N\u0027", + "help_text": "Type: `string`. If a UMI base has a quality below this threshold, replace the base with \u0027N\u0027.\n" + + } + + + , + "quality_encoding": { + "type": + "string", + "description": "Type: `string`, choices: ``phred33`, `phred64`, `solexa``. Quality score encoding", + "help_text": "Type: `string`, choices: ``phred33`, `phred64`, `solexa``. Quality score encoding. Choose from:\n * phred33 [33-77]\n * phred64 [64-106]\n * solexa [59-106]\n", + "enum": ["phred33", "phred64", "solexa"] + + + } + + + , + "reconcile_pairs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Allow read 2 infile to contain reads not in read 1 infile", + "help_text": "Type: `boolean_true`, default: `false`. Allow read 2 infile to contain reads not in read 1 infile. This enables support for upstream protocols\nwhere read one contains cell barcodes, and the read pairs have been filtered and corrected without regard\nto the read2.\n" + , + "default":false + } + + + , + "three_prime": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. By default the barcode is assumed to be on the 5\u0027 end of the read, but use this option to sepecify that it is\non the 3\u0027 end instead", + "help_text": "Type: `boolean_true`, default: `false`. By default the barcode is assumed to be on the 5\u0027 end of the read, but use this option to sepecify that it is\non the 3\u0027 end instead. This option only works with --extract_method=string since 3\u0027 encoding can be specified\nexplicitly with a regex, e.g `.*(?P\u003cumi_1\u003e.{5})$`.\n" + , + "default":false + } + + + , + "ignore_read_pair_suffixes": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Ignore \"/1\" and \"/2\" read name suffixes", + "help_text": "Type: `boolean_true`, default: `false`. Ignore \"/1\" and \"/2\" read name suffixes. Note that this options is required if the suffixes are not whitespace\nseparated from the rest of the read name.\narguments:\n" + , + "default":false + } + + + , + "umi_separator": { + "type": + "string", + "description": "Type: `string`, example: `_`. The character that separates the UMI in the read name", + "help_text": "Type: `string`, example: `_`. The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with\nUMI-tools and used other software. Default: `_`\n" + + } + + + , + "grouping_method": { + "type": + "string", + "description": "Type: `string`, example: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. Method to use to determine read groups by subsuming those with similar UMIs", + "help_text": "Type: `string`, example: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying\nthe reads with the same mapping position, but treat similar yet nonidentical UMIs differently. Default: `directional`\n", + "enum": ["unique", "percentile", "cluster", "adjacency", "directional"] + + + } + + +} +}, + + + "common options" : { + "title": "Common Options", + "type": "object", + "description": "No description", + "properties": { + + + "log": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.log`. File with logging information", + "help_text": "Type: `file`, default: `$id.$key.log`. File with logging information." + , + "default":"$id.$key.log" + } + + + , + "log2stderr": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Send logging information to stderr", + "help_text": "Type: `boolean_true`, default: `false`. Send logging information to stderr." + , + "default":false + } + + + , + "verbose": { + "type": + "integer", + "description": "Type: `integer`. Log level", + "help_text": "Type: `integer`. Log level. The higher, the more output." + + } + + + , + "error": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.error`. File with error information", + "help_text": "Type: `file`, default: `$id.$key.error`. File with error information." + , + "default":"$id.$key.error" + } + + + , + "temp_dir": { + "type": + "string", + "description": "Type: `string`. Directory for temporary files", + "help_text": "Type: `string`. Directory for temporary files. If not set, the bash environmental variable TMPDIR is used.\n" + + } + + + , + "compresslevel": { + "type": + "integer", + "description": "Type: `integer`, example: `6`. Level of Gzip compression to use", + "help_text": "Type: `integer`, example: `6`. Level of Gzip compression to use. Default=6 matches GNU gzip rather than python gzip default (which is 9).\nDefault `6`.\n" + + } + + + , + "timeit": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.timeit`. Store timing information in file", + "help_text": "Type: `file`, default: `$id.$key.timeit`. Store timing information in file." + , + "default":"$id.$key.timeit" + } + + + , + "timeit_name": { + "type": + "string", + "description": "Type: `string`, default: `all`. Name in timing file for this class of jobs", + "help_text": "Type: `string`, default: `all`. Name in timing file for this class of jobs." + , + "default":"all" + } + + + , + "timeit_header": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Add header for timing information", + "help_text": "Type: `boolean_true`, default: `false`. Add header for timing information." + , + "default":false + } + + + , + "random_seed": { + "type": + "integer", + "description": "Type: `integer`. Random seed to initialize number generator with", + "help_text": "Type: `integer`. Random seed to initialize number generator with." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/extract options" + }, + + { + "$ref": "#/definitions/common options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml new file mode 100644 index 0000000..b7c0a86 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml @@ -0,0 +1,297 @@ +name: "umi_tools_prepareforrsem" +namespace: "umi_tools" +version: "v0.3.1" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-I" + - "--stdin" + info: null + example: + - "$id.transcriptome.bam" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-S" + - "--stdout" + info: null + example: + - "$id.transcriptome_sorted.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--log" + alternatives: + - "-L" + description: "File with logging information [default = stdout]." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--error" + alternatives: + - "-E" + description: "File with error information [default = stderr]." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--log2stderr" + description: "Send logging information to stderr [default = False]." + info: null + direction: "input" + - type: "string" + name: "--temp_dir" + description: "Directory for temporary files. If not set, the bash environmental\ + \ variable \nTMPDIR is used.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--compresslevel" + description: "Level of Gzip compression to use. Default (6) matchesGNU gzip rather\ + \ than python \ngzip default (which is 9).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Options" + arguments: + - type: "string" + name: "--tags" + description: "Comma-seperated list of tags to transfer from read1 to read2 (Default:\ + \ 'UG,BX')\n" + info: null + example: + - "UG,BX" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--sam" + description: "Input and output SAM rather than BAM." + info: null + direction: "input" + - type: "string" + name: "--timeit" + description: "Store timeing information in file [none].\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--timeit_name" + description: "Name in timing file for this class of jobs [all].\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--timeit_header" + description: "Add header for timing information [none]." + info: null + direction: "input" + - type: "integer" + name: "--verbose" + alternatives: + - "-v" + description: "Loglevel [1]. The higher, the more output.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--random_seed" + description: "Random seed to initialize number generator with [none].\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "prepare-for-rsem.py" +description: "Make the output from umi-tools dedup or group compatible with RSEM" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "umi_tools" +- "rsem" +- "bam" +- "sam" +license: "MIT" +references: + doi: + - "10.1101/gr.209601.116" +links: + repository: "https://github.com/CGATOxford/UMI-tools" + homepage: "https://umi-tools.readthedocs.io/en/latest/" + documentation: "https://umi-tools.readthedocs.io/en/latest/reference/extract.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/umi_tools:1.1.5--py38h0020b31_3" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "umi_tools -v | sed 's/ version//g' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/umi_tools/umi_tools_prepareforrsem/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/umi_tools/umi_tools_prepareforrsem" + executable: "target/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf new file mode 100644 index 0000000..a809880 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf @@ -0,0 +1,3963 @@ +// umi_tools_prepareforrsem v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "umi_tools_prepareforrsem", + "namespace" : "umi_tools", + "version" : "v0.3.1", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-I", + "--stdin" + ], + "example" : [ + "$id.transcriptome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-S", + "--stdout" + ], + "example" : [ + "$id.transcriptome_sorted.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--log", + "alternatives" : [ + "-L" + ], + "description" : "File with logging information [default = stdout].", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--error", + "alternatives" : [ + "-E" + ], + "description" : "File with error information [default = stderr].", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--log2stderr", + "description" : "Send logging information to stderr [default = False].", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--temp_dir", + "description" : "Directory for temporary files. If not set, the bash environmental variable \nTMPDIR is used.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--compresslevel", + "description" : "Level of Gzip compression to use. Default (6) matchesGNU gzip rather than python \ngzip default (which is 9).\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "string", + "name" : "--tags", + "description" : "Comma-seperated list of tags to transfer from read1 to read2 (Default: 'UG,BX')\n", + "example" : [ + "UG,BX" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--sam", + "description" : "Input and output SAM rather than BAM.", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--timeit", + "description" : "Store timeing information in file [none].\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--timeit_name", + "description" : "Name in timing file for this class of jobs [all].\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--timeit_header", + "description" : "Add header for timing information [none].", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--verbose", + "alternatives" : [ + "-v" + ], + "description" : "Loglevel [1]. The higher, the more output.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--random_seed", + "description" : "Random seed to initialize number generator with [none].\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "prepare-for-rsem.py" + } + ], + "description" : "Make the output from umi-tools dedup or group compatible with RSEM", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "umi_tools", + "rsem", + "bam", + "sam" + ], + "license" : "MIT", + "references" : { + "doi" : [ + "10.1101/gr.209601.116" + ] + }, + "links" : { + "repository" : "https://github.com/CGATOxford/UMI-tools", + "homepage" : "https://umi-tools.readthedocs.io/en/latest/", + "documentation" : "https://umi-tools.readthedocs.io/en/latest/reference/extract.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/umi_tools:1.1.5--py38h0020b31_3", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "umi_tools -v | sed 's/ version//g' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/umi_tools/umi_tools_prepareforrsem/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/umi_tools/umi_tools_prepareforrsem", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi ) +$( if [ ! -z ${VIASH_PAR_ERROR+x} ]; then echo "${VIASH_PAR_ERROR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_error='&'#" ; else echo "# par_error="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG2STDERR+x} ]; then echo "${VIASH_PAR_LOG2STDERR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log2stderr='&'#" ; else echo "# par_log2stderr="; fi ) +$( if [ ! -z ${VIASH_PAR_TEMP_DIR+x} ]; then echo "${VIASH_PAR_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_temp_dir='&'#" ; else echo "# par_temp_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_COMPRESSLEVEL+x} ]; then echo "${VIASH_PAR_COMPRESSLEVEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_compresslevel='&'#" ; else echo "# par_compresslevel="; fi ) +$( if [ ! -z ${VIASH_PAR_TAGS+x} ]; then echo "${VIASH_PAR_TAGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tags='&'#" ; else echo "# par_tags="; fi ) +$( if [ ! -z ${VIASH_PAR_SAM+x} ]; then echo "${VIASH_PAR_SAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sam='&'#" ; else echo "# par_sam="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT+x} ]; then echo "${VIASH_PAR_TIMEIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit='&'#" ; else echo "# par_timeit="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT_NAME+x} ]; then echo "${VIASH_PAR_TIMEIT_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit_name='&'#" ; else echo "# par_timeit_name="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT_HEADER+x} ]; then echo "${VIASH_PAR_TIMEIT_HEADER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit_header='&'#" ; else echo "# par_timeit_header="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_PAR_RANDOM_SEED+x} ]; then echo "${VIASH_PAR_RANDOM_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_random_seed='&'#" ; else echo "# par_random_seed="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +unset_if_false=( + par_sam + par_error + par_log2stderr + par_timeit_header ) + +for var in "\\${unset_if_false[@]}"; do + test_val="\\${!var}" + [[ "\\$test_val" == "false" ]] && unset \\$var +done + +umi_tools prepare-for-rsem \\\\ + \\${par_log:+--log "\\${par_log}"} \\\\ + \\${par_tags:+--tags "\\${par_tags}"} \\\\ + \\${par_sam:+--sam} \\\\ + --stdin="\\${par_input}" \\\\ + \\${par_output:+--stdout "\\${par_output}"} \\\\ + \\${par_error:+--error "\\${par_error}"} \\\\ + \\${par_temp_dir:+--temp-dir "\\${par_temp_dir}"} \\\\ + \\${par_log2stderr:+--log2stderr} \\\\ + \\${par_verbose:+--verbose "\\${par_verbose}"} \\\\ + \\${par_random_seed:+--random-seed "\\${par_random_seed}"} \\\\ + \\${par_compresslevel:+--compresslevel "\\${par_compresslevel}"} + \\${par_timeit:+--timeit "\\${par_timeit}"} \\\\ + \\${par_timeit_name:+--timeit-name "\\${par_timeit_name}"} \\\\ + \\${par_timeit_header:+--timeit-header} +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/umi_tools/umi_tools_prepareforrsem", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow.config new file mode 100644 index 0000000..e9bf8e6 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'umi_tools/umi_tools_prepareforrsem' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Make the output from umi-tools dedup or group compatible with RSEM' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow_schema.json new file mode 100644 index 0000000..98b77e2 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow_schema.json @@ -0,0 +1,234 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "umi_tools_prepareforrsem", +"description": "Make the output from umi-tools dedup or group compatible with RSEM", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `$id.transcriptome.bam`. ", + "help_text": "Type: `file`, required, example: `$id.transcriptome.bam`. " + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.bam`, example: `$id.transcriptome_sorted.bam`. ", + "help_text": "Type: `file`, default: `$id.$key.output.bam`, example: `$id.transcriptome_sorted.bam`. " + , + "default":"$id.$key.output.bam" + } + + + , + "log": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.log`. File with logging information [default = stdout]", + "help_text": "Type: `file`, default: `$id.$key.log`. File with logging information [default = stdout]." + , + "default":"$id.$key.log" + } + + + , + "error": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.error`. File with error information [default = stderr]", + "help_text": "Type: `file`, default: `$id.$key.error`. File with error information [default = stderr]." + , + "default":"$id.$key.error" + } + + + , + "log2stderr": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Send logging information to stderr [default = False]", + "help_text": "Type: `boolean_true`, default: `false`. Send logging information to stderr [default = False]." + , + "default":false + } + + + , + "temp_dir": { + "type": + "string", + "description": "Type: `string`. Directory for temporary files", + "help_text": "Type: `string`. Directory for temporary files. If not set, the bash environmental variable \nTMPDIR is used.\n" + + } + + + , + "compresslevel": { + "type": + "integer", + "description": "Type: `integer`. Level of Gzip compression to use", + "help_text": "Type: `integer`. Level of Gzip compression to use. Default (6) matchesGNU gzip rather than python \ngzip default (which is 9).\n" + + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "tags": { + "type": + "string", + "description": "Type: `string`, example: `UG,BX`. Comma-seperated list of tags to transfer from read1 to read2 (Default: \u0027UG,BX\u0027)\n", + "help_text": "Type: `string`, example: `UG,BX`. Comma-seperated list of tags to transfer from read1 to read2 (Default: \u0027UG,BX\u0027)\n" + + } + + + , + "sam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input and output SAM rather than BAM", + "help_text": "Type: `boolean_true`, default: `false`. Input and output SAM rather than BAM." + , + "default":false + } + + + , + "timeit": { + "type": + "string", + "description": "Type: `string`. Store timeing information in file [none]", + "help_text": "Type: `string`. Store timeing information in file [none].\n" + + } + + + , + "timeit_name": { + "type": + "string", + "description": "Type: `string`. Name in timing file for this class of jobs [all]", + "help_text": "Type: `string`. Name in timing file for this class of jobs [all].\n" + + } + + + , + "timeit_header": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Add header for timing information [none]", + "help_text": "Type: `boolean_true`, default: `false`. Add header for timing information [none]." + , + "default":false + } + + + , + "verbose": { + "type": + "integer", + "description": "Type: `integer`. Loglevel [1]", + "help_text": "Type: `integer`. Loglevel [1]. The higher, the more output.\n" + + } + + + , + "random_seed": { + "type": + "integer", + "description": "Type: `integer`. Random seed to initialize number generator with [none]", + "help_text": "Type: `integer`. Random seed to initialize number generator with [none].\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/prepare-for-rsem.py b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/prepare-for-rsem.py new file mode 100644 index 0000000..b53d30a --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/prepare-for-rsem.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 + +""" +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Credits +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This script is a clone of the "prepare-for-rsem.py" script written by +Ian Sudbury, Tom Smith and other contributors to the UMI-tools package: +https://github.com/CGATOxford/UMI-tools + +It has been included here to address problems encountered with +Salmon quant and RSEM as discussed in the issue below: +https://github.com/CGATOxford/UMI-tools/issues/465 + +When the "umi_tools prepare-for-rsem" command becomes available in an official +UMI-tools release this script will be replaced and deprecated. + +Commit: +https://github.com/CGATOxford/UMI-tools/blob/bf8608d6a172c5ca0dcf33c126b4e23429177a72/umi_tools/prepare-for-rsem.py + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +prepare_for_rsem - make the output from dedup or group compatible with RSEM +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The SAM format specification states that the mnext and mpos fields should point +to the primary alignment of a read's mate. However, not all aligners adhere to +this standard. In addition, the RSEM software requires that the mate of a read1 +appears directly after it in its input BAM. This requires that there is exactly +one read1 alignment for every read2 and vice versa. + +In general (except in a few edge cases) UMI tools outputs only the read2 to that +corresponds to the read specified in the mnext and mpos positions of a selected +read1, and only outputs this read once, even if multiple read1s point to it. +This makes UMI-tools outputs incompatible with RSEM. This script takes the output +from dedup or groups and ensures that each read1 has exactly one read2 (and vice +versa), that read2 always appears directly after read1,and that pairs point to +each other (note this is technically not valid SAM format). Copy any specified +tags from read1 to read2 if they are present (by default, UG and BX, the unique +group and correct UMI tags added by _group_) + +Input must to name sorted. + + +https://raw.githubusercontent.com/CGATOxford/UMI-tools/master/LICENSE + +""" + +from umi_tools import Utilities as U +from collections import defaultdict, Counter +import pysam +import sys + + +usage = """ +prepare_for_rsem - make output from dedup or group compatible with RSEM + +Usage: umi_tools prepare_for_rsem [OPTIONS] [--stdin=IN_BAM] [--stdout=OUT_BAM] + + note: If --stdout is omited, standard out is output. To + generate a valid BAM file on standard out, please + redirect log with --log=LOGFILE or --log2stderr """ + + +def chunk_bam(bamfile): + """Take in a iterator of pysam.AlignmentSegment entries and yield + lists of reads that all share the same name""" + + last_query_name = None + output_buffer = list() + + for read in bamfile: + if last_query_name is not None and last_query_name != read.query_name: + yield (output_buffer) + output_buffer = list() + + last_query_name = read.query_name + output_buffer.append(read) + + yield (output_buffer) + + +def copy_tags(tags, read1, read2): + """Given a list of tags, copies the values of these tags from read1 + to read2, if the tag is set""" + + for tag in tags: + try: + read1_tag = read1.get_tag(tag, with_value_type=True) + read2.set_tag(tag, value=read1_tag[0], value_type=read1_tag[1]) + except KeyError: + pass + + return read2 + + +def pick_mate(read, template_dict, mate_key): + """Find the mate of read in the template dict using key. It will retrieve + all reads at that key, and then scan to pick the one that refers to _read_ + as it's mate. If there is no such read, it picks a first one it comes to""" + + mate = None + + # get a list of secondary reads at the correct alignment position + potential_mates = template_dict[not read.is_read1][mate_key] + + # search through one at a time to find a read that points to the current read + # as its mate. + for candidate_mate in potential_mates: + if ( + candidate_mate.next_reference_name == read.reference_name + and candidate_mate.next_reference_start == read.pos + ): + mate = candidate_mate + + # if no such read is found, then pick any old secondary alignment at that position + # note: this happens when UMI-tools outputs the wrong read as something's pair. + if mate is None and len(potential_mates) > 0: + mate = potential_mates[0] + + return mate + + +def main(argv=None): + if argv is None: + argv = sys.argv + + # setup command line parser + parser = U.OptionParser(version="%prog version: $Id$", usage=usage, description=globals()["__doc__"]) + group = U.OptionGroup(parser, "RSEM preparation specific options") + + group.add_option( + "--tags", + dest="tags", + type="string", + default="UG,BX", + help="Comma-separated list of tags to transfer from read1 to read2", + ) + group.add_option( + "--sam", dest="sam", action="store_true", default=False, help="input and output SAM rather than BAM" + ) + + parser.add_option_group(group) + + # add common options (-h/--help, ...) and parse command line + (options, args) = U.Start( + parser, argv=argv, add_group_dedup_options=False, add_umi_grouping_options=False, add_sam_options=False + ) + + skipped_stats = Counter() + + if options.stdin != sys.stdin: + in_name = options.stdin.name + options.stdin.close() + else: + in_name = "-" + + if options.sam: + mode = "" + else: + mode = "b" + + inbam = pysam.AlignmentFile(in_name, "r" + mode) + + if options.stdout != sys.stdout: + out_name = options.stdout.name + options.stdout.close() + else: + out_name = "-" + + outbam = pysam.AlignmentFile(out_name, "w" + mode, template=inbam) + + options.tags = options.tags.split(",") + + for template in chunk_bam(inbam): + assert len(set(r.query_name for r in template)) == 1 + current_template = {True: defaultdict(list), False: defaultdict(list)} + + for read in template: + key = (read.reference_name, read.pos, not read.is_secondary) + current_template[read.is_read1][key].append(read) + + output = set() + + for read in template: + mate = None + + # if this read is a non_primary alignment, we first want to check if it has a mate + # with the non-primary alignment flag set. + + mate_key_primary = True + mate_key_secondary = (read.next_reference_name, read.next_reference_start, False) + + # First look for a read that has the same primary/secondary status + # as read (i.e. secondary mate for secondary read, and primary mate + # for primary read) + mate_key = (read.next_reference_name, read.next_reference_start, read.is_secondary) + mate = pick_mate(read, current_template, mate_key) + + # If none was found then look for the opposite (primary mate of secondary + # read or seconadary mate of primary read) + if mate is None: + mate_key = (read.next_reference_name, read.next_reference_start, not read.is_secondary) + mate = pick_mate(read, current_template, mate_key) + + # If we still don't have a mate, then their can't be one? + if mate is None: + skipped_stats["no_mate"] += 1 + U.warn( + "Alignment {} has no mate -- skipped".format( + "\t".join(map(str, [read.query_name, read.flag, read.reference_name, int(read.pos)])) + ) + ) + continue + + # because we might want to make changes to the read, but not have those changes reflected + # if we need the read again,we copy the read. This is only way I can find to do this. + read = pysam.AlignedSegment().from_dict(read.to_dict(), read.header) + mate = pysam.AlignedSegment().from_dict(mate.to_dict(), read.header) + + # Make it so that if our read is secondary, the mate is also secondary. We don't make the + # mate primary if the read is primary because we would otherwise end up with mulitple + # primary alignments. + if read.is_secondary: + mate.is_secondary = True + + # In a situation where there is already one mate for each read, then we will come across + # each pair twice - once when we scan read1 and once when we scan read2. Thus we need + # to make sure we don't output something already output. + if read.is_read1: + mate = copy_tags(options.tags, read, mate) + output_key = str(read) + str(mate) + + if output_key not in output: + output.add(output_key) + outbam.write(read) + outbam.write(mate) + skipped_stats["pairs_output"] += 1 + + elif read.is_read2: + read = copy_tags(options.tags, mate, read) + output_key = str(mate) + str(read) + + if output_key not in output: + output.add(output_key) + outbam.write(mate) + outbam.write(read) + skipped_stats["pairs_output"] += 1 + + else: + skipped_stats["skipped_not_read_12"] += 1 + U.warn( + "Alignment {} is neither read1 nor read2 -- skipped".format( + "\t".join(map(str, [read.query_name, read.flag, read.reference_name, int(read.pos)])) + ) + ) + continue + + if not out_name == "-": + outbam.close() + + U.info( + "Total pairs output: {}, Pairs skipped - no mates: {}," + " Pairs skipped - not read1 or 2: {}".format( + skipped_stats["pairs_output"], skipped_stats["no_mate"], skipped_stats["skipped_not_read12"] + ) + ) + U.Stop() + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/.config.vsh.yaml b/target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/.config.vsh.yaml new file mode 100644 index 0000000..9c48fed --- /dev/null +++ b/target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/.config.vsh.yaml @@ -0,0 +1,173 @@ +name: "untar" +version: "v0.1.0" +argument_groups: +- name: "Input arguments" + arguments: + - type: "file" + name: "--input" + description: "Tarball file to be unpacked." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output arguments" + arguments: + - type: "file" + name: "--output" + description: "Directory to write the contents of the .tar file to." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Other arguments" + arguments: + - type: "string" + name: "--exclude" + alternatives: + - "-e" + description: "Prevents any file or member whose name matches the shell wildcard\ + \ (pattern) from being extracted." + info: null + example: + - "docs/figures" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Unpack a .tar file. When the contents of the .tar file is just a single\ + \ directory,\nput the contents of the directory into the output folder instead of\ + \ that directory.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +requirements: + commands: + - "ps" +license: "MIT" +links: + repository: "https://github.com/viash-hub/craftbox" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:stable-slim" + target_registry: "images.viash-hub.com" + target_tag: "v0.1.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/untar/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/untar" + executable: "target/nextflow/untar/main.nf" + viash_version: "0.9.0" + git_commit: "3143dd6e4c2c3107f79639fe8602d92f3141ad82" + git_remote: "https://github.com/viash-hub/craftbox" +package_config: + name: "craftbox" + version: "v0.1.0" + description: "A collection of custom-tailored scripts and applied tools.\n" + info: null + viash_version: "0.9.0" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + keywords: + - "scripts" + - "custom" + - "implementations" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/craftbox" + issue_tracker: "https://github.com/viash-hub/craftbox/issues" diff --git a/target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/main.nf b/target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/main.nf new file mode 100644 index 0000000..48fe97b --- /dev/null +++ b/target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/main.nf @@ -0,0 +1,3576 @@ +// untar v0.1.0 +// +// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value instanceof String) { + try { + value = value.toInteger() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigInteger) { + value = value.intValue() + } + expectedClass = value instanceof Integer ? null : "Integer" + } else if (par.type == "long") { + // cast to long if need be + if (value instanceof String) { + try { + value = value.toLong() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof Integer) { + value = value.toLong() + } + expectedClass = value instanceof Long ? null : "Long" + } else if (par.type == "double") { + // cast to double if need be + if (value instanceof String) { + try { + value = value.toDouble() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigDecimal) { + value = value.doubleValue() + } + if (value instanceof Float) { + value = value.toDouble() + } + expectedClass = value instanceof Double ? null : "Double" + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value instanceof String) { + def valueLower = value.toLowerCase() + if (valueLower == "true") { + value = true + } else if (valueLower == "false") { + value = false + } + } + expectedClass = value instanceof Boolean ? null : "Boolean" + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required) { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _processOutputValues(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{[yamlFile] + outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ +mkdir -p "\$(dirname '${yamlFile}')" +echo "Storing state as yaml" +echo '${yamlBlob}' > '${yamlFile}' +echo "Copying output files to destination folder" +${copyCommands.join("\n ")} +""" +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (key, value) are the tuples that will be saved to the state.yaml file + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = val instanceof File ? val.toPath() : val + [value: value_, inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutput = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + // check output tuple + | map { id_, output_ -> + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _processOutputValues(output_, meta.config, id_, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { + output_ = output_.values()[0] + } + + [join_id, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublish = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublish, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + + // remove join_id and meta + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "untar", + "version" : "v0.1.0", + "argument_groups" : [ + { + "name" : "Input arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Tarball file to be unpacked.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Directory to write the contents of the .tar file to.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Other arguments", + "arguments" : [ + { + "type" : "string", + "name" : "--exclude", + "alternatives" : [ + "-e" + ], + "description" : "Prevents any file or member whose name matches the shell wildcard (pattern) from being extracted.", + "example" : [ + "docs/figures" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Unpack a .tar file. When the contents of the .tar file is just a single directory,\nput the contents of the directory into the output folder instead of that directory.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/craftbox" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "debian:stable-slim", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.1.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/untar/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/untar", + "viash_version" : "0.9.0", + "git_commit" : "3143dd6e4c2c3107f79639fe8602d92f3141ad82", + "git_remote" : "https://github.com/viash-hub/craftbox" + }, + "package_config" : { + "name" : "craftbox", + "version" : "v0.1.0", + "description" : "A collection of custom-tailored scripts and applied tools.\n", + "viash_version" : "0.9.0", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + ], + "keywords" : [ + "scripts", + "custom", + "implementations" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/craftbox", + "issue_tracker" : "https://github.com/viash-hub/craftbox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/usr/bin/env bash + +set -eo pipefail + +extra_args=() + +TMPDIR=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "\\$TMPDIR" ]] && rm -r "\\$TMPDIR" +} +trap clean_up EXIT + +# Check if tarball contains 1 top-level directory. If so, extract the contents of the +# directory to the output directory instead of the directory itself. +echo "Directory contents:" +tar -taf "\\${par_input}" > "\\$TMPDIR/tar_contents.txt" +cat "\\$TMPDIR/tar_contents.txt" + +printf "Checking if tarball contains only a single top-level directory: " +if [[ \\$(grep -o -E '^[./]*[^/]+/\\$' "\\$TMPDIR/tar_contents.txt" | uniq | wc -l) -eq 1 ]]; then + echo "It does." + echo "Extracting the contents of the top-level directory to the output directory instead of the directory itself." + # The directory can be both of the format './' (or ././) or just + # Adjust the number of stripped components accordingly by looking for './' at the beginning of the file. + starting_relative=\\$(grep -oP -m 1 '^(./)*' "\\$TMPDIR/tar_contents.txt" | tr -d '\\\\n' | wc -c) + n_strips=\\$(( (\\$starting_relative / 2)+1 )) + extra_args+=("--strip-components=\\$n_strips") +else + echo "It does not." +fi + +if [ "\\$par_exclude" != "" ]; then + echo "Exclusion of files with wildcard '\\$par_exclude' requested." + extra_args+=("--exclude=\\$par_exclude") +fi + +echo "Starting extraction of tarball '\\$par_input' to output directory '\\$par_output'." +mkdir -p "\\$par_output" +echo "executing 'tar --no-same-owner --no-same-permissions --directory=\\$par_output \\${extra_args[@]} -xavf \\$par_input'" +tar --no-same-owner --no-same-permissions --directory="\\$par_output" \\${extra_args[@]} -xavf "\\$par_input" +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = new nextflow.script.ScriptParser(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/craftbox/untar", + "tag" : "v0.1.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/nextflow.config b/target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/nextflow.config new file mode 100644 index 0000000..b023cf6 --- /dev/null +++ b/target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'untar' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.1.0' + description = 'Unpack a .tar file. When the contents of the .tar file is just a single directory,\nput the contents of the directory into the output folder instead of that directory.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/nextflow_schema.json b/target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/nextflow_schema.json new file mode 100644 index 0000000..5ab4909 --- /dev/null +++ b/target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/nextflow_schema.json @@ -0,0 +1,119 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "untar", +"description": "Unpack a .tar file. When the contents of the .tar file is just a single directory,\nput the contents of the directory into the output folder instead of that directory.\n", +"type": "object", +"definitions": { + + + + "input arguments" : { + "title": "Input arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Tarball file to be unpacked", + "help_text": "Type: `file`, required. Tarball file to be unpacked." + + } + + +} +}, + + + "output arguments" : { + "title": "Output arguments", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`. Directory to write the contents of the ", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Directory to write the contents of the .tar file to." + , + "default": "$id.$key.output.output" + } + + +} +}, + + + "other arguments" : { + "title": "Other arguments", + "type": "object", + "description": "No description", + "properties": { + + + "exclude": { + "type": + "string", + "description": "Type: `string`, example: `docs/figures`. Prevents any file or member whose name matches the shell wildcard (pattern) from being extracted", + "help_text": "Type: `string`, example: `docs/figures`. Prevents any file or member whose name matches the shell wildcard (pattern) from being extracted." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input arguments" + }, + + { + "$ref": "#/definitions/output arguments" + }, + + { + "$ref": "#/definitions/other arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/executable/bedtools_genomecov/.config.vsh.yaml b/target/executable/bedtools_genomecov/.config.vsh.yaml new file mode 100644 index 0000000..285af0a --- /dev/null +++ b/target/executable/bedtools_genomecov/.config.vsh.yaml @@ -0,0 +1,218 @@ +name: "bedtools_genomecov" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity." + info: null + required: false + choices: + - "unstranded" + - "forward" + - "reverse" + - "auto" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam" + description: "Genome BAM file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_bedtools_args" + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--bedgraph_forward" + info: null + default: + - "$id.forward.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bedgraph_reverse" + info: null + default: + - "$id.reverse.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Compute BEDGRAPH (-bg) summaries of feature coverage" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "chr19.bam" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/bedtools_genomecov.nf" + last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && \\\napt-get install -y build-essential wget && \\\nwget --no-check-certificate\ + \ https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static\ + \ && \\\nmv bedtools.static /usr/local/bin/bedtools && \\\nchmod a+x /usr/local/bin/bedtools\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/bedtools_genomecov/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/bedtools_genomecov" + executable: "target/executable/bedtools_genomecov/bedtools_genomecov" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/bedtools_genomecov/bedtools_genomecov b/target/executable/bedtools_genomecov/bedtools_genomecov new file mode 100755 index 0000000..c1bf995 --- /dev/null +++ b/target/executable/bedtools_genomecov/bedtools_genomecov @@ -0,0 +1,1206 @@ +#!/usr/bin/env bash + +# bedtools_genomecov v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="bedtools_genomecov" +VIASH_META_FUNCTIONALITY_NAME="bedtools_genomecov" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ +apt-get install -y build-essential wget && \ +wget --no-check-certificate https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static && \ +mv bedtools.static /usr/local/bin/bedtools && \ +chmod a+x /usr/local/bin/bedtools + +LABEL org.opencontainers.image.description="Companion container for running component bedtools_genomecov" +LABEL org.opencontainers.image.created="2025-05-07T12:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedtools_genomecov v0.3.0" + echo "" + echo "Compute BEDGRAPH (-bg) summaries of feature coverage" + echo "" + echo "Input:" + echo " --strandedness" + echo " type: string" + echo " choices: [ unstranded, forward, reverse, auto ]" + echo " Sample strand-specificity." + echo "" + echo " --bam" + echo " type: file, file must exist" + echo " Genome BAM file" + echo "" + echo " --extra_bedtools_args" + echo " type: string" + echo " default:" + echo "" + echo "Output:" + echo " --bedgraph_forward" + echo " type: file, output, file must exist" + echo " default: \$id.forward.bedgraph" + echo "" + echo " --bedgraph_reverse" + echo " type: file, output, file must exist" + echo " default: \$id.reverse.bedgraph" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "bedtools_genomecov v0.3.0" + exit + ;; + --strandedness) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --strandedness. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strandedness=*) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness=*\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bam) + [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam=*) + [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam=*\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --extra_bedtools_args) + [ -n "$VIASH_PAR_EXTRA_BEDTOOLS_ARGS" ] && ViashError Bad arguments for option \'--extra_bedtools_args\': \'$VIASH_PAR_EXTRA_BEDTOOLS_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_BEDTOOLS_ARGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_bedtools_args. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --extra_bedtools_args=*) + [ -n "$VIASH_PAR_EXTRA_BEDTOOLS_ARGS" ] && ViashError Bad arguments for option \'--extra_bedtools_args=*\': \'$VIASH_PAR_EXTRA_BEDTOOLS_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_BEDTOOLS_ARGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bedgraph_forward) + [ -n "$VIASH_PAR_BEDGRAPH_FORWARD" ] && ViashError Bad arguments for option \'--bedgraph_forward\': \'$VIASH_PAR_BEDGRAPH_FORWARD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH_FORWARD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bedgraph_forward. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bedgraph_forward=*) + [ -n "$VIASH_PAR_BEDGRAPH_FORWARD" ] && ViashError Bad arguments for option \'--bedgraph_forward=*\': \'$VIASH_PAR_BEDGRAPH_FORWARD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH_FORWARD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bedgraph_reverse) + [ -n "$VIASH_PAR_BEDGRAPH_REVERSE" ] && ViashError Bad arguments for option \'--bedgraph_reverse\': \'$VIASH_PAR_BEDGRAPH_REVERSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH_REVERSE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bedgraph_reverse. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bedgraph_reverse=*) + [ -n "$VIASH_PAR_BEDGRAPH_REVERSE" ] && ViashError Bad arguments for option \'--bedgraph_reverse=*\': \'$VIASH_PAR_BEDGRAPH_REVERSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH_REVERSE=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/bedtools_genomecov:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_EXTRA_BEDTOOLS_ARGS+x} ]; then + VIASH_PAR_EXTRA_BEDTOOLS_ARGS="" +fi +if [ -z ${VIASH_PAR_BEDGRAPH_FORWARD+x} ]; then + VIASH_PAR_BEDGRAPH_FORWARD="\$id.forward.bedgraph" +fi +if [ -z ${VIASH_PAR_BEDGRAPH_REVERSE+x} ]; then + VIASH_PAR_BEDGRAPH_REVERSE="\$id.reverse.bedgraph" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_BAM" ] && [ ! -e "$VIASH_PAR_BAM" ]; then + ViashError "Input file '$VIASH_PAR_BAM' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_STRANDEDNESS" ]; then + VIASH_PAR_STRANDEDNESS_CHOICES=("unstranded;forward;reverse;auto") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_STRANDEDNESS_CHOICES[*]};" =~ ";$VIASH_PAR_STRANDEDNESS;" ]]; then + ViashError '--strandedness' specified value of \'$VIASH_PAR_STRANDEDNESS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_BEDGRAPH_FORWARD" ] && [ ! -d "$(dirname "$VIASH_PAR_BEDGRAPH_FORWARD")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BEDGRAPH_FORWARD")" +fi +if [ ! -z "$VIASH_PAR_BEDGRAPH_REVERSE" ] && [ ! -d "$(dirname "$VIASH_PAR_BEDGRAPH_REVERSE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BEDGRAPH_REVERSE")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_BAM" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BAM")" ) + VIASH_PAR_BAM=$(ViashDockerAutodetectMount "$VIASH_PAR_BAM") +fi +if [ ! -z "$VIASH_PAR_BEDGRAPH_FORWARD" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BEDGRAPH_FORWARD")" ) + VIASH_PAR_BEDGRAPH_FORWARD=$(ViashDockerAutodetectMount "$VIASH_PAR_BEDGRAPH_FORWARD") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_BEDGRAPH_FORWARD" ) +fi +if [ ! -z "$VIASH_PAR_BEDGRAPH_REVERSE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BEDGRAPH_REVERSE")" ) + VIASH_PAR_BEDGRAPH_REVERSE=$(ViashDockerAutodetectMount "$VIASH_PAR_BEDGRAPH_REVERSE") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_BEDGRAPH_REVERSE" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bedtools_genomecov-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\"'\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\"'\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRA_BEDTOOLS_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_BEDTOOLS_ARGS}" | sed "s#'#'\"'\"'#g;s#.*#par_extra_bedtools_args='&'#" ; else echo "# par_extra_bedtools_args="; fi ) +$( if [ ! -z ${VIASH_PAR_BEDGRAPH_FORWARD+x} ]; then echo "${VIASH_PAR_BEDGRAPH_FORWARD}" | sed "s#'#'\"'\"'#g;s#.*#par_bedgraph_forward='&'#" ; else echo "# par_bedgraph_forward="; fi ) +$( if [ ! -z ${VIASH_PAR_BEDGRAPH_REVERSE+x} ]; then echo "${VIASH_PAR_BEDGRAPH_REVERSE}" | sed "s#'#'\"'\"'#g;s#.*#par_bedgraph_reverse='&'#" ; else echo "# par_bedgraph_reverse="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +prefix_forward="forward" +prefix_reverse="reverse" +if [ \$par_strandedness == 'reverse' ]; then + prefix_forward="reverse" + prefix_reverse="forward" +fi + +bedtools genomecov \\ + -ibam \$par_bam \\ + -bg \\ + -strand + \\ + \$par_extra_bedtools_args | bedtools sort > \$prefix_forward.bedGraph + +bedtools genomecov \\ + -ibam \$par_bam \\ + -bg \\ + -strand - \\ + \$par_extra_bedtools_args | bedtools sort > \$prefix_reverse.bedGraph + +mv \$prefix_forward.bedGraph \$par_bedgraph_forward +mv \$prefix_reverse.bedGraph \$par_bedgraph_reverse +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_BAM" ]; then + VIASH_PAR_BAM=$(ViashDockerStripAutomount "$VIASH_PAR_BAM") + fi + if [ ! -z "$VIASH_PAR_BEDGRAPH_FORWARD" ]; then + VIASH_PAR_BEDGRAPH_FORWARD=$(ViashDockerStripAutomount "$VIASH_PAR_BEDGRAPH_FORWARD") + fi + if [ ! -z "$VIASH_PAR_BEDGRAPH_REVERSE" ]; then + VIASH_PAR_BEDGRAPH_REVERSE=$(ViashDockerStripAutomount "$VIASH_PAR_BEDGRAPH_REVERSE") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_BEDGRAPH_FORWARD" ] && [ ! -e "$VIASH_PAR_BEDGRAPH_FORWARD" ]; then + ViashError "Output file '$VIASH_PAR_BEDGRAPH_FORWARD' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BEDGRAPH_REVERSE" ] && [ ! -e "$VIASH_PAR_BEDGRAPH_REVERSE" ]; then + ViashError "Output file '$VIASH_PAR_BEDGRAPH_REVERSE' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/bedtools_genomecov/nextflow_labels.config b/target/executable/bedtools_genomecov/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/bedtools_genomecov/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/cat_additional_fasta/.config.vsh.yaml b/target/executable/cat_additional_fasta/.config.vsh.yaml new file mode 100644 index 0000000..823e83d --- /dev/null +++ b/target/executable/cat_additional_fasta/.config.vsh.yaml @@ -0,0 +1,222 @@ +name: "cat_additional_fasta" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--fasta" + description: "Path to FASTA genome file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "Path to GTF annotation file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--additional_fasta" + description: "FASTA file to concatenate to genome FASTA file e.g. containing spike-in\ + \ sequences." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--biotype" + description: "Biotype value to use when appending entries to GTF file when additional\ + \ fasta file is provided." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--fasta_output" + description: "Concatenated FASTA file." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf_output" + description: "Concatenated GTF file." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "python_script" + path: "script.py" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Concatenate addional fasta file to reference FASTA and GTF files.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "genome.fasta" +- type: "file" + path: "genes.gtf.gz" +- type: "file" + path: "gfp.fa.gz" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/cat_additional_fasta.nf" + last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/cat_additional_fasta/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/cat_additional_fasta" + executable: "target/executable/cat_additional_fasta/cat_additional_fasta" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/cat_additional_fasta/cat_additional_fasta b/target/executable/cat_additional_fasta/cat_additional_fasta new file mode 100755 index 0000000..89b019c --- /dev/null +++ b/target/executable/cat_additional_fasta/cat_additional_fasta @@ -0,0 +1,1268 @@ +#!/usr/bin/env bash + +# cat_additional_fasta v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="cat_additional_fasta" +VIASH_META_FUNCTIONALITY_NAME="cat_additional_fasta" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM python:latest +ENTRYPOINT [] +LABEL org.opencontainers.image.description="Companion container for running component cat_additional_fasta" +LABEL org.opencontainers.image.created="2025-05-07T12:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "cat_additional_fasta v0.3.0" + echo "" + echo "Concatenate addional fasta file to reference FASTA and GTF files." + echo "" + echo "Input:" + echo " --fasta" + echo " type: file, required parameter, file must exist" + echo " Path to FASTA genome file." + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo " Path to GTF annotation file." + echo "" + echo " --additional_fasta" + echo " type: file, file must exist" + echo " FASTA file to concatenate to genome FASTA file e.g. containing spike-in" + echo " sequences." + echo "" + echo " --biotype" + echo " type: string" + echo " Biotype value to use when appending entries to GTF file when additional" + echo " fasta file is provided." + echo "" + echo "Output:" + echo " --fasta_output" + echo " type: file, output, file must exist" + echo " Concatenated FASTA file." + echo "" + echo " --gtf_output" + echo " type: file, output, file must exist" + echo " Concatenated GTF file." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "cat_additional_fasta v0.3.0" + exit + ;; + --fasta) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fasta=*) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta=*\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf=*) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --additional_fasta) + [ -n "$VIASH_PAR_ADDITIONAL_FASTA" ] && ViashError Bad arguments for option \'--additional_fasta\': \'$VIASH_PAR_ADDITIONAL_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ADDITIONAL_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --additional_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --additional_fasta=*) + [ -n "$VIASH_PAR_ADDITIONAL_FASTA" ] && ViashError Bad arguments for option \'--additional_fasta=*\': \'$VIASH_PAR_ADDITIONAL_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ADDITIONAL_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --biotype) + [ -n "$VIASH_PAR_BIOTYPE" ] && ViashError Bad arguments for option \'--biotype\': \'$VIASH_PAR_BIOTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIOTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --biotype. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --biotype=*) + [ -n "$VIASH_PAR_BIOTYPE" ] && ViashError Bad arguments for option \'--biotype=*\': \'$VIASH_PAR_BIOTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIOTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fasta_output) + [ -n "$VIASH_PAR_FASTA_OUTPUT" ] && ViashError Bad arguments for option \'--fasta_output\': \'$VIASH_PAR_FASTA_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fasta_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fasta_output=*) + [ -n "$VIASH_PAR_FASTA_OUTPUT" ] && ViashError Bad arguments for option \'--fasta_output=*\': \'$VIASH_PAR_FASTA_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf_output) + [ -n "$VIASH_PAR_GTF_OUTPUT" ] && ViashError Bad arguments for option \'--gtf_output\': \'$VIASH_PAR_GTF_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf_output=*) + [ -n "$VIASH_PAR_GTF_OUTPUT" ] && ViashError Bad arguments for option \'--gtf_output=*\': \'$VIASH_PAR_GTF_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/cat_additional_fasta:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_FASTA+x} ]; then + ViashError '--fasta' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTA" ] && [ ! -e "$VIASH_PAR_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then + ViashError "Input file '$VIASH_PAR_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_ADDITIONAL_FASTA" ] && [ ! -e "$VIASH_PAR_ADDITIONAL_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_ADDITIONAL_FASTA' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_FASTA_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTA_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTA_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_GTF_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_GTF_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GTF_OUTPUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_FASTA" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTA")" ) + VIASH_PAR_FASTA=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTA") +fi +if [ ! -z "$VIASH_PAR_GTF" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_GTF")" ) + VIASH_PAR_GTF=$(ViashDockerAutodetectMount "$VIASH_PAR_GTF") +fi +if [ ! -z "$VIASH_PAR_ADDITIONAL_FASTA" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_ADDITIONAL_FASTA")" ) + VIASH_PAR_ADDITIONAL_FASTA=$(ViashDockerAutodetectMount "$VIASH_PAR_ADDITIONAL_FASTA") +fi +if [ ! -z "$VIASH_PAR_FASTA_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTA_OUTPUT")" ) + VIASH_PAR_FASTA_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTA_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_FASTA_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_GTF_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_GTF_OUTPUT")" ) + VIASH_PAR_GTF_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_GTF_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_GTF_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cat_additional_fasta-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/usr/bin/env python3 +""" +Read a custom fasta file and create a custom GTF containing each entry +""" +from itertools import groupby +import logging +import os +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'fasta': $( if [ ! -z ${VIASH_PAR_FASTA+x} ]; then echo "r'${VIASH_PAR_FASTA//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'gtf': $( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "r'${VIASH_PAR_GTF//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'additional_fasta': $( if [ ! -z ${VIASH_PAR_ADDITIONAL_FASTA+x} ]; then echo "r'${VIASH_PAR_ADDITIONAL_FASTA//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'biotype': $( if [ ! -z ${VIASH_PAR_BIOTYPE+x} ]; then echo "r'${VIASH_PAR_BIOTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'fasta_output': $( if [ ! -z ${VIASH_PAR_FASTA_OUTPUT+x} ]; then echo "r'${VIASH_PAR_FASTA_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'gtf_output': $( if [ ! -z ${VIASH_PAR_GTF_OUTPUT+x} ]; then echo "r'${VIASH_PAR_GTF_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +dep = { + +} + +## VIASH END + +def fasta_iter(fasta_name): + """ + modified from Brent Pedersen + Correct Way To Parse A Fasta File In Python + given a fasta file. yield tuples of header, sequence + + Fasta iterator from https://www.biostars.org/p/710/#120760 + """ + with open(fasta_name) as fh: + # ditch the boolean (x[0]) and just keep the header or sequence since + # we know they alternate. + faiter = (x[1] for x in groupby(fh, lambda line: line[0] == ">")) + for header in faiter: + # drop the ">" + headerStr = header.__next__()[1:].strip() + # join all sequence lines to one. + seq = "".join(s.strip() for s in faiter.__next__()) + yield (headerStr, seq) + +def fasta2gtf(fasta, output, biotype): + fiter = fasta_iter(fasta) + # GTF output lines + lines = [] + attributes = 'exon_id "{name}.1"; exon_number "1";{biotype} gene_id "{name}_gene"; gene_name "{name}_gene"; gene_source "custom"; transcript_id "{name}_gene"; transcript_name "{name}_gene";\\n' + line_template = "{name}\\ttransgene\\texon\\t1\\t{length}\\t.\\t+\\t.\\t" + attributes + for ff in fiter: + name, seq = ff + # Use first ID as separated by spaces as the "sequence name" + # (equivalent to "chromosome" in other cases) + seqname = name.split()[0] + # Remove all spaces + name = seqname.replace(" ", "_") + length = len(seq) + biotype_attr = "" + if biotype: + biotype_attr = f' {biotype} "transgene";' + line = line_template.format(name=name, length=length, biotype=biotype_attr) + lines.append(line) + with open(output, "w") as f: + f.write("".join(lines)) + +add_name = os.path.basename(par['additional_fasta']) +output = os.path.splitext(add_name)[0] + ".gtf" +fasta2gtf(par['additional_fasta'], output, par['biotype']) + +with open(par['fasta'], 'r') as f1: + content1 = f1.read() +with open(par['additional_fasta'], 'r') as f2: + content2 = f2.read() +with open(par['fasta_output'], 'w') as f_out: + f_out.write(content1 + content2) +with open(par['gtf'], 'r') as g1: + g_content1 = g1.read() +with open(output, 'r') as g2: + g_content2 = g2.read() +with open(par['gtf_output'], 'w') as g_out: + g_out.write(g_content1 + g_content2) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_FASTA" ]; then + VIASH_PAR_FASTA=$(ViashDockerStripAutomount "$VIASH_PAR_FASTA") + fi + if [ ! -z "$VIASH_PAR_GTF" ]; then + VIASH_PAR_GTF=$(ViashDockerStripAutomount "$VIASH_PAR_GTF") + fi + if [ ! -z "$VIASH_PAR_ADDITIONAL_FASTA" ]; then + VIASH_PAR_ADDITIONAL_FASTA=$(ViashDockerStripAutomount "$VIASH_PAR_ADDITIONAL_FASTA") + fi + if [ ! -z "$VIASH_PAR_FASTA_OUTPUT" ]; then + VIASH_PAR_FASTA_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_FASTA_OUTPUT") + fi + if [ ! -z "$VIASH_PAR_GTF_OUTPUT" ]; then + VIASH_PAR_GTF_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_GTF_OUTPUT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTA_OUTPUT" ] && [ ! -e "$VIASH_PAR_FASTA_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_FASTA_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GTF_OUTPUT" ] && [ ! -e "$VIASH_PAR_GTF_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_GTF_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/cat_additional_fasta/nextflow_labels.config b/target/executable/cat_additional_fasta/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/cat_additional_fasta/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/cat_fastq/.config.vsh.yaml b/target/executable/cat_fastq/.config.vsh.yaml new file mode 100644 index 0000000..bab7838 --- /dev/null +++ b/target/executable/cat_fastq/.config.vsh.yaml @@ -0,0 +1,209 @@ +name: "cat_fastq" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--read_1" + description: "Read 1 fastq files to be concatenated" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--read_2" + description: "Read 2 fastq files to be concatenated" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--fastq_1" + description: "Concatenated read 1 fastq" + info: null + default: + - "$id_r1.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Concatenated read 2 fastq" + info: null + default: + - "$id_r2.fastq" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Concatenate multiple fastq files" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "SRR6357070_1.fastq.gz" +- type: "file" + path: "SRR6357071_1.fastq.gz" +- type: "file" + path: "SRR6357070_2.fastq.gz" +- type: "file" + path: "SRR6357071_2.fastq.gz" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/cat/fastq/main.nf" + - "modules/nf-core/cat/fastq/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/cat_fastq/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/cat_fastq" + executable: "target/executable/cat_fastq/cat_fastq" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/cat_fastq/cat_fastq b/target/executable/cat_fastq/cat_fastq new file mode 100755 index 0000000..ab1a09a --- /dev/null +++ b/target/executable/cat_fastq/cat_fastq @@ -0,0 +1,1233 @@ +#!/usr/bin/env bash + +# cat_fastq v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="cat_fastq" +VIASH_META_FUNCTIONALITY_NAME="cat_fastq" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +LABEL org.opencontainers.image.description="Companion container for running component cat_fastq" +LABEL org.opencontainers.image.created="2025-05-07T12:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "cat_fastq v0.3.0" + echo "" + echo "Concatenate multiple fastq files" + echo "" + echo "Input:" + echo " --read_1" + echo " type: file, multiple values allowed, file must exist" + echo " Read 1 fastq files to be concatenated" + echo "" + echo " --read_2" + echo " type: file, multiple values allowed, file must exist" + echo " Read 2 fastq files to be concatenated" + echo "" + echo "Output:" + echo " --fastq_1" + echo " type: file, output, file must exist" + echo " default: \$id_r1.fastq" + echo " Concatenated read 1 fastq" + echo "" + echo " --fastq_2" + echo " type: file, output" + echo " default: \$id_r2.fastq" + echo " Concatenated read 2 fastq" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "cat_fastq v0.3.0" + exit + ;; + --read_1) + if [ -z "$VIASH_PAR_READ_1" ]; then + VIASH_PAR_READ_1="$2" + else + VIASH_PAR_READ_1="$VIASH_PAR_READ_1;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_1=*) + if [ -z "$VIASH_PAR_READ_1" ]; then + VIASH_PAR_READ_1=$(ViashRemoveFlags "$1") + else + VIASH_PAR_READ_1="$VIASH_PAR_READ_1;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --read_2) + if [ -z "$VIASH_PAR_READ_2" ]; then + VIASH_PAR_READ_2="$2" + else + VIASH_PAR_READ_2="$VIASH_PAR_READ_2;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_2=*) + if [ -z "$VIASH_PAR_READ_2" ]; then + VIASH_PAR_READ_2=$(ViashRemoveFlags "$1") + else + VIASH_PAR_READ_2="$VIASH_PAR_READ_2;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --fastq_1) + [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_1=*) + [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1=*\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastq_2) + [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_2=*) + [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2=*\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/cat_fastq:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_FASTQ_1+x} ]; then + VIASH_PAR_FASTQ_1="\$id_r1.fastq" +fi +if [ -z ${VIASH_PAR_FASTQ_2+x} ]; then + VIASH_PAR_FASTQ_2="\$id_r2.fastq" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_READ_1" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_READ_1; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_READ_2" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_READ_2; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_FASTQ_1" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQ_1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTQ_1")" +fi +if [ ! -z "$VIASH_PAR_FASTQ_2" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQ_2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTQ_2")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_READ_1" ]; then + VIASH_TEST_READ_1=() + IFS=';' + for var in $VIASH_PAR_READ_1; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_READ_1+=( "$var" ) + done + VIASH_PAR_READ_1=$(IFS=';' ; echo "${VIASH_TEST_READ_1[*]}") +fi +if [ ! -z "$VIASH_PAR_READ_2" ]; then + VIASH_TEST_READ_2=() + IFS=';' + for var in $VIASH_PAR_READ_2; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_READ_2+=( "$var" ) + done + VIASH_PAR_READ_2=$(IFS=';' ; echo "${VIASH_TEST_READ_2[*]}") +fi +if [ ! -z "$VIASH_PAR_FASTQ_1" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTQ_1")" ) + VIASH_PAR_FASTQ_1=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTQ_1") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_FASTQ_1" ) +fi +if [ ! -z "$VIASH_PAR_FASTQ_2" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTQ_2")" ) + VIASH_PAR_FASTQ_2=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTQ_2") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_FASTQ_2" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cat_fastq-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_READ_1+x} ]; then echo "${VIASH_PAR_READ_1}" | sed "s#'#'\"'\"'#g;s#.*#par_read_1='&'#" ; else echo "# par_read_1="; fi ) +$( if [ ! -z ${VIASH_PAR_READ_2+x} ]; then echo "${VIASH_PAR_READ_2}" | sed "s#'#'\"'\"'#g;s#.*#par_read_2='&'#" ; else echo "# par_read_2="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQ_1+x} ]; then echo "${VIASH_PAR_FASTQ_1}" | sed "s#'#'\"'\"'#g;s#.*#par_fastq_1='&'#" ; else echo "# par_fastq_1="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQ_2+x} ]; then echo "${VIASH_PAR_FASTQ_2}" | sed "s#'#'\"'\"'#g;s#.*#par_fastq_2='&'#" ; else echo "# par_fastq_2="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +IFS=";" read -ra read_1 <<< \$par_read_1 +IFS=";" read -ra read_2 <<< \$par_read_2 + +filename=\$(basename -- "\${read_1[0]}") +if [ \${filename##*.} == "gz" ]; then + command="zcat" +else + command="cat" +fi + +if [ \${#read_1[@]} -gt 0 ]; then + \$command \${read_1[*]} > \$par_fastq_1 +fi +if [ \${#read_2[@]} -gt 0 ]; then + \$command \${read_2[*]} > \$par_fastq_2 +fi +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_READ_1" ]; then + unset VIASH_TEST_READ_1 + IFS=';' + for var in $VIASH_PAR_READ_1; do + unset IFS + if [ -z "$VIASH_TEST_READ_1" ]; then + VIASH_TEST_READ_1="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_READ_1="$VIASH_TEST_READ_1;""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_READ_1="$VIASH_TEST_READ_1" + fi + if [ ! -z "$VIASH_PAR_READ_2" ]; then + unset VIASH_TEST_READ_2 + IFS=';' + for var in $VIASH_PAR_READ_2; do + unset IFS + if [ -z "$VIASH_TEST_READ_2" ]; then + VIASH_TEST_READ_2="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_READ_2="$VIASH_TEST_READ_2;""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_READ_2="$VIASH_TEST_READ_2" + fi + if [ ! -z "$VIASH_PAR_FASTQ_1" ]; then + VIASH_PAR_FASTQ_1=$(ViashDockerStripAutomount "$VIASH_PAR_FASTQ_1") + fi + if [ ! -z "$VIASH_PAR_FASTQ_2" ]; then + VIASH_PAR_FASTQ_2=$(ViashDockerStripAutomount "$VIASH_PAR_FASTQ_2") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTQ_1" ] && [ ! -e "$VIASH_PAR_FASTQ_1" ]; then + ViashError "Output file '$VIASH_PAR_FASTQ_1' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/cat_fastq/nextflow_labels.config b/target/executable/cat_fastq/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/cat_fastq/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/copy_if_exists/.config.vsh.yaml b/target/executable/copy_if_exists/.config.vsh.yaml new file mode 100644 index 0000000..804b85f --- /dev/null +++ b/target/executable/copy_if_exists/.config.vsh.yaml @@ -0,0 +1,183 @@ +name: "copy_if_exists" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--required_file" + info: null + example: + - "/tmp/rnaseq_workflow_config/required_file.txt" + must_exist: false + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--optional_file" + info: null + example: + - "/tmp/rnaseq_workflow_config/optional_file.txt" + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Ouput" + arguments: + - type: "file" + name: "--output" + info: null + default: + - "copy_if_exists_output" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "required_file.txt" +- type: "file" + path: "optional_file.txt" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/extra/copy_if_exists/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/copy_if_exists" + executable: "target/executable/copy_if_exists/copy_if_exists" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/copy_if_exists/copy_if_exists b/target/executable/copy_if_exists/copy_if_exists new file mode 100755 index 0000000..72081ee --- /dev/null +++ b/target/executable/copy_if_exists/copy_if_exists @@ -0,0 +1,1136 @@ +#!/usr/bin/env bash + +# copy_if_exists v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="copy_if_exists" +VIASH_META_FUNCTIONALITY_NAME="copy_if_exists" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +LABEL org.opencontainers.image.description="Companion container for running component copy_if_exists" +LABEL org.opencontainers.image.created="2025-05-07T12:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "copy_if_exists v0.3.0" + echo "" + echo "Input:" + echo " --required_file" + echo " type: file, required parameter" + echo " example: /tmp/rnaseq_workflow_config/required_file.txt" + echo "" + echo " --optional_file" + echo " type: file" + echo " example: /tmp/rnaseq_workflow_config/optional_file.txt" + echo "" + echo "Ouput:" + echo " --output" + echo " type: file, output, file must exist" + echo " default: copy_if_exists_output" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "copy_if_exists v0.3.0" + exit + ;; + --required_file) + [ -n "$VIASH_PAR_REQUIRED_FILE" ] && ViashError Bad arguments for option \'--required_file\': \'$VIASH_PAR_REQUIRED_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REQUIRED_FILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --required_file. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --required_file=*) + [ -n "$VIASH_PAR_REQUIRED_FILE" ] && ViashError Bad arguments for option \'--required_file=*\': \'$VIASH_PAR_REQUIRED_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REQUIRED_FILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --optional_file) + [ -n "$VIASH_PAR_OPTIONAL_FILE" ] && ViashError Bad arguments for option \'--optional_file\': \'$VIASH_PAR_OPTIONAL_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OPTIONAL_FILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --optional_file. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --optional_file=*) + [ -n "$VIASH_PAR_OPTIONAL_FILE" ] && ViashError Bad arguments for option \'--optional_file=*\': \'$VIASH_PAR_OPTIONAL_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OPTIONAL_FILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/copy_if_exists:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_REQUIRED_FILE+x} ]; then + ViashError '--required_file' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="copy_if_exists_output" +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_REQUIRED_FILE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_REQUIRED_FILE")" ) + VIASH_PAR_REQUIRED_FILE=$(ViashDockerAutodetectMount "$VIASH_PAR_REQUIRED_FILE") +fi +if [ ! -z "$VIASH_PAR_OPTIONAL_FILE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OPTIONAL_FILE")" ) + VIASH_PAR_OPTIONAL_FILE=$(ViashDockerAutodetectMount "$VIASH_PAR_OPTIONAL_FILE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-copy_if_exists-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_REQUIRED_FILE+x} ]; then echo "${VIASH_PAR_REQUIRED_FILE}" | sed "s#'#'\"'\"'#g;s#.*#par_required_file='&'#" ; else echo "# par_required_file="; fi ) +$( if [ ! -z ${VIASH_PAR_OPTIONAL_FILE+x} ]; then echo "${VIASH_PAR_OPTIONAL_FILE}" | sed "s#'#'\"'\"'#g;s#.*#par_optional_file='&'#" ; else echo "# par_optional_file="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +mkdir -p \$par_output + +# This file is checked by the Nextflow module wrapper +cp \$par_required_file "\$par_output" + +# If the variable is empty, we use the default one (registered as a resource) +if [ -z \$par_optional_file ]; then + echo "No optional_file provided, using the default" + cp \$meta_resources_dir/optional_file.txt "\$par_output" +else + echo "Optional file provided" + if [ -f \$par_optional_file ]; then + cp \$par_optional_file "\$par_output" + else + # Unreachable: the Viash-generated module checks this + echo "Optional file does not exist" + exit 1 + fi +fi + +echo "Done" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_REQUIRED_FILE" ]; then + VIASH_PAR_REQUIRED_FILE=$(ViashDockerStripAutomount "$VIASH_PAR_REQUIRED_FILE") + fi + if [ ! -z "$VIASH_PAR_OPTIONAL_FILE" ]; then + VIASH_PAR_OPTIONAL_FILE=$(ViashDockerStripAutomount "$VIASH_PAR_OPTIONAL_FILE") + fi + if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/copy_if_exists/nextflow_labels.config b/target/executable/copy_if_exists/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/copy_if_exists/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/copy_if_exists/optional_file.txt b/target/executable/copy_if_exists/optional_file.txt new file mode 100644 index 0000000..6462a73 --- /dev/null +++ b/target/executable/copy_if_exists/optional_file.txt @@ -0,0 +1 @@ +Optional! diff --git a/target/executable/copy_if_exists/required_file.txt b/target/executable/copy_if_exists/required_file.txt new file mode 100644 index 0000000..b4dbbb5 --- /dev/null +++ b/target/executable/copy_if_exists/required_file.txt @@ -0,0 +1 @@ +Required! diff --git a/target/executable/deseq2_qc/.config.vsh.yaml b/target/executable/deseq2_qc/.config.vsh.yaml new file mode 100644 index 0000000..c7608e5 --- /dev/null +++ b/target/executable/deseq2_qc/.config.vsh.yaml @@ -0,0 +1,287 @@ +name: "deseq2_qc" +version: "v0.3.0" +argument_groups: +- name: "input" + arguments: + - type: "file" + name: "--counts" + description: "Count file matrix where rows are genes and columns are samples." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--vst" + description: "Use vst transformation instead of rlog with .DESeq2" + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--count_col" + description: "First column containing sample count data." + info: null + default: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--id_col" + description: "Column containing identifiers to be used." + info: null + default: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sample_suffix" + description: "Suffix to remove after sample name in columns e.g. '.rmDup.bam'\ + \ if 'DRUG_R1.rmDup.bam'." + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outprefix" + description: "Output prefix" + info: null + default: + - "deseq2" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--label" + description: "Label to used in MultiQC report" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--outdir" + info: null + default: + - "deseq2" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pca_multiqc" + info: null + default: + - "deseq2.pca.vals_mqc.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sample_dists_multiqc" + info: null + default: + - "deseq2.sample.dists_mqc.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "r_script" + path: "script.r" + is_executable: true +- type: "file" + path: "deseq2_pca_header.txt" +- type: "file" + path: "deseq2_clustering_header.txt" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "run deseq2, perform pca, generate heatmaps and scatterplots for samples\ + \ in the counts files\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "counts.tsv" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/deseq2_qc.nf" + last_sha: "92b2a7857de1dda9d1c19a088941fc81e2976ff7" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:latest" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "libcurl4-openssl-dev" + - "r-base" + - "r-base-core" + - "libxml2-dev" + - "procps" + - "libssl-dev" + interactive: false + - type: "r" + cran: + - "optparse" + - "ggplot2" + - "RColorBrewer" + - "pheatmap" + - "stringr" + - "matrixStats" + bioc: + - "DESeq2" + bioc_force_install: false + warnings_as_errors: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/deseq2_qc/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/deseq2_qc" + executable: "target/executable/deseq2_qc/deseq2_qc" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/deseq2_qc/deseq2_clustering_header.txt b/target/executable/deseq2_qc/deseq2_clustering_header.txt new file mode 100644 index 0000000..04e10ef --- /dev/null +++ b/target/executable/deseq2_qc/deseq2_clustering_header.txt @@ -0,0 +1,12 @@ +#id: 'deseq2_clustering' +#section_name: 'DESeq2 sample similarity' +#description: "is generated from clustering by Euclidean distances between +# DESeq2 +# rlog values for each sample +# in the deseq2_qc.r script." +#plot_type: 'heatmap' +#anchor: 'deseq2_clustering' +#pconfig: +# title: 'DESeq2: Heatmap of the sample-to-sample distances' +# xlab: True +# reverseColors: True diff --git a/target/executable/deseq2_qc/deseq2_pca_header.txt b/target/executable/deseq2_qc/deseq2_pca_header.txt new file mode 100644 index 0000000..2599732 --- /dev/null +++ b/target/executable/deseq2_qc/deseq2_pca_header.txt @@ -0,0 +1,11 @@ +#id: 'deseq2_pca' +#section_name: 'DESeq2 PCA plot' +#description: "PCA plot between samples in the experiment. +# These values are calculated using DESeq2 +# in the deseq2_qc.r script." +#plot_type: 'scatter' +#anchor: 'deseq2_pca' +#pconfig: +# title: 'DESeq2: Principal component plot' +# xlab: PC1 +# ylab: PC2 diff --git a/target/executable/deseq2_qc/deseq2_qc b/target/executable/deseq2_qc/deseq2_qc new file mode 100755 index 0000000..2821662 --- /dev/null +++ b/target/executable/deseq2_qc/deseq2_qc @@ -0,0 +1,1540 @@ +#!/usr/bin/env bash + +# deseq2_qc v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="deseq2_qc" +VIASH_META_FUNCTIONALITY_NAME="deseq2_qc" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM debian:latest +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y libcurl4-openssl-dev r-base r-base-core libxml2-dev procps libssl-dev && \ + rm -rf /var/lib/apt/lists/* + +RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("DESeq2", quietly = TRUE)) BiocManager::install("DESeq2")' && \ + Rscript -e 'options(warn = 2); remotes::install_cran(c("optparse", "ggplot2", "RColorBrewer", "pheatmap", "stringr", "matrixStats"), repos = "https://cran.rstudio.com")' + +LABEL org.opencontainers.image.description="Companion container for running component deseq2_qc" +LABEL org.opencontainers.image.created="2025-05-07T12:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "deseq2_qc v0.3.0" + echo "" + echo "run deseq2, perform pca, generate heatmaps and scatterplots for samples in the" + echo "counts files" + echo "" + echo "input:" + echo " --counts" + echo " type: file, required parameter, file must exist" + echo " Count file matrix where rows are genes and columns are samples." + echo "" + echo " --vst" + echo " type: boolean" + echo " default: false" + echo " Use vst transformation instead of rlog with .DESeq2" + echo "" + echo " --count_col" + echo " type: integer" + echo " default: 3" + echo " First column containing sample count data." + echo "" + echo " --id_col" + echo " type: integer" + echo " default: 1" + echo " Column containing identifiers to be used." + echo "" + echo " --sample_suffix" + echo " type: string" + echo " default:" + echo " Suffix to remove after sample name in columns e.g. '.rmDup.bam' if" + echo " 'DRUG_R1.rmDup.bam'." + echo "" + echo " --outprefix" + echo " type: string" + echo " default: deseq2" + echo " Output prefix" + echo "" + echo " --label" + echo " type: string" + echo " Label to used in MultiQC report" + echo "" + echo "Output:" + echo " --outdir" + echo " type: file, output, file must exist" + echo " default: deseq2" + echo "" + echo " --pca_multiqc" + echo " type: file, output, file must exist" + echo " default: deseq2.pca.vals_mqc.tsv" + echo "" + echo " --sample_dists_multiqc" + echo " type: file, output, file must exist" + echo " default: deseq2.sample.dists_mqc.tsv" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "deseq2_qc v0.3.0" + exit + ;; + --counts) + [ -n "$VIASH_PAR_COUNTS" ] && ViashError Bad arguments for option \'--counts\': \'$VIASH_PAR_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts=*) + [ -n "$VIASH_PAR_COUNTS" ] && ViashError Bad arguments for option \'--counts=*\': \'$VIASH_PAR_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --vst) + [ -n "$VIASH_PAR_VST" ] && ViashError Bad arguments for option \'--vst\': \'$VIASH_PAR_VST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VST="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --vst. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --vst=*) + [ -n "$VIASH_PAR_VST" ] && ViashError Bad arguments for option \'--vst=*\': \'$VIASH_PAR_VST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VST=$(ViashRemoveFlags "$1") + shift 1 + ;; + --count_col) + [ -n "$VIASH_PAR_COUNT_COL" ] && ViashError Bad arguments for option \'--count_col\': \'$VIASH_PAR_COUNT_COL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNT_COL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --count_col. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --count_col=*) + [ -n "$VIASH_PAR_COUNT_COL" ] && ViashError Bad arguments for option \'--count_col=*\': \'$VIASH_PAR_COUNT_COL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNT_COL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --id_col) + [ -n "$VIASH_PAR_ID_COL" ] && ViashError Bad arguments for option \'--id_col\': \'$VIASH_PAR_ID_COL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID_COL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --id_col. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id_col=*) + [ -n "$VIASH_PAR_ID_COL" ] && ViashError Bad arguments for option \'--id_col=*\': \'$VIASH_PAR_ID_COL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID_COL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sample_suffix) + [ -n "$VIASH_PAR_SAMPLE_SUFFIX" ] && ViashError Bad arguments for option \'--sample_suffix\': \'$VIASH_PAR_SAMPLE_SUFFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SUFFIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_suffix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sample_suffix=*) + [ -n "$VIASH_PAR_SAMPLE_SUFFIX" ] && ViashError Bad arguments for option \'--sample_suffix=*\': \'$VIASH_PAR_SAMPLE_SUFFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SUFFIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outprefix) + [ -n "$VIASH_PAR_OUTPREFIX" ] && ViashError Bad arguments for option \'--outprefix\': \'$VIASH_PAR_OUTPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPREFIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outprefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outprefix=*) + [ -n "$VIASH_PAR_OUTPREFIX" ] && ViashError Bad arguments for option \'--outprefix=*\': \'$VIASH_PAR_OUTPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPREFIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --label) + [ -n "$VIASH_PAR_LABEL" ] && ViashError Bad arguments for option \'--label\': \'$VIASH_PAR_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LABEL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --label. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --label=*) + [ -n "$VIASH_PAR_LABEL" ] && ViashError Bad arguments for option \'--label=*\': \'$VIASH_PAR_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LABEL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outdir) + [ -n "$VIASH_PAR_OUTDIR" ] && ViashError Bad arguments for option \'--outdir\': \'$VIASH_PAR_OUTDIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTDIR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outdir. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outdir=*) + [ -n "$VIASH_PAR_OUTDIR" ] && ViashError Bad arguments for option \'--outdir=*\': \'$VIASH_PAR_OUTDIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTDIR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pca_multiqc) + [ -n "$VIASH_PAR_PCA_MULTIQC" ] && ViashError Bad arguments for option \'--pca_multiqc\': \'$VIASH_PAR_PCA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PCA_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pca_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pca_multiqc=*) + [ -n "$VIASH_PAR_PCA_MULTIQC" ] && ViashError Bad arguments for option \'--pca_multiqc=*\': \'$VIASH_PAR_PCA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PCA_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sample_dists_multiqc) + [ -n "$VIASH_PAR_SAMPLE_DISTS_MULTIQC" ] && ViashError Bad arguments for option \'--sample_dists_multiqc\': \'$VIASH_PAR_SAMPLE_DISTS_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_DISTS_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_dists_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sample_dists_multiqc=*) + [ -n "$VIASH_PAR_SAMPLE_DISTS_MULTIQC" ] && ViashError Bad arguments for option \'--sample_dists_multiqc=*\': \'$VIASH_PAR_SAMPLE_DISTS_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_DISTS_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/deseq2_qc:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_COUNTS+x} ]; then + ViashError '--counts' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_VST+x} ]; then + VIASH_PAR_VST="false" +fi +if [ -z ${VIASH_PAR_COUNT_COL+x} ]; then + VIASH_PAR_COUNT_COL="3" +fi +if [ -z ${VIASH_PAR_ID_COL+x} ]; then + VIASH_PAR_ID_COL="1" +fi +if [ -z ${VIASH_PAR_SAMPLE_SUFFIX+x} ]; then + VIASH_PAR_SAMPLE_SUFFIX="" +fi +if [ -z ${VIASH_PAR_OUTPREFIX+x} ]; then + VIASH_PAR_OUTPREFIX="deseq2" +fi +if [ -z ${VIASH_PAR_OUTDIR+x} ]; then + VIASH_PAR_OUTDIR="deseq2" +fi +if [ -z ${VIASH_PAR_PCA_MULTIQC+x} ]; then + VIASH_PAR_PCA_MULTIQC="deseq2.pca.vals_mqc.tsv" +fi +if [ -z ${VIASH_PAR_SAMPLE_DISTS_MULTIQC+x} ]; then + VIASH_PAR_SAMPLE_DISTS_MULTIQC="deseq2.sample.dists_mqc.tsv" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_COUNTS" ] && [ ! -e "$VIASH_PAR_COUNTS" ]; then + ViashError "Input file '$VIASH_PAR_COUNTS' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_VST" ]]; then + if ! [[ "$VIASH_PAR_VST" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--vst' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_COUNT_COL" ]]; then + if ! [[ "$VIASH_PAR_COUNT_COL" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--count_col' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ID_COL" ]]; then + if ! [[ "$VIASH_PAR_ID_COL" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--id_col' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTDIR" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTDIR")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTDIR")" +fi +if [ ! -z "$VIASH_PAR_PCA_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_PCA_MULTIQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PCA_MULTIQC")" +fi +if [ ! -z "$VIASH_PAR_SAMPLE_DISTS_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_SAMPLE_DISTS_MULTIQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_SAMPLE_DISTS_MULTIQC")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_COUNTS" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COUNTS")" ) + VIASH_PAR_COUNTS=$(ViashDockerAutodetectMount "$VIASH_PAR_COUNTS") +fi +if [ ! -z "$VIASH_PAR_OUTDIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTDIR")" ) + VIASH_PAR_OUTDIR=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTDIR") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTDIR" ) +fi +if [ ! -z "$VIASH_PAR_PCA_MULTIQC" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_PCA_MULTIQC")" ) + VIASH_PAR_PCA_MULTIQC=$(ViashDockerAutodetectMount "$VIASH_PAR_PCA_MULTIQC") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_PCA_MULTIQC" ) +fi +if [ ! -z "$VIASH_PAR_SAMPLE_DISTS_MULTIQC" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_SAMPLE_DISTS_MULTIQC")" ) + VIASH_PAR_SAMPLE_DISTS_MULTIQC=$(ViashDockerAutodetectMount "$VIASH_PAR_SAMPLE_DISTS_MULTIQC") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_SAMPLE_DISTS_MULTIQC" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-deseq2_qc-XXXXXX").R +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +# treat warnings as errors +.viash_orig_warn <- options(warn = 2) + +par <- list( + "counts" = $( if [ ! -z ${VIASH_PAR_COUNTS+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_COUNTS" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "vst" = $( if [ ! -z ${VIASH_PAR_VST+x} ]; then echo -n "as.logical(toupper('"; echo -n "$VIASH_PAR_VST" | sed "s#['\\]#\\\\&#g"; echo "'))"; else echo NULL; fi ), + "count_col" = $( if [ ! -z ${VIASH_PAR_COUNT_COL+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_COUNT_COL" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "id_col" = $( if [ ! -z ${VIASH_PAR_ID_COL+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_ID_COL" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "sample_suffix" = $( if [ ! -z ${VIASH_PAR_SAMPLE_SUFFIX+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_SAMPLE_SUFFIX" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "outprefix" = $( if [ ! -z ${VIASH_PAR_OUTPREFIX+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPREFIX" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "label" = $( if [ ! -z ${VIASH_PAR_LABEL+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_LABEL" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "outdir" = $( if [ ! -z ${VIASH_PAR_OUTDIR+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTDIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "pca_multiqc" = $( if [ ! -z ${VIASH_PAR_PCA_MULTIQC+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_PCA_MULTIQC" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "sample_dists_multiqc" = $( if [ ! -z ${VIASH_PAR_SAMPLE_DISTS_MULTIQC+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_SAMPLE_DISTS_MULTIQC" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ) +) +meta <- list( + "name" = $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kib" = $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KIB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mib" = $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MIB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gib" = $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GIB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tib" = $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TIB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pib" = $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PIB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ) +) +dep <- list( + +) + + +# restore original warn setting +options(.viash_orig_warn) +rm(.viash_orig_warn) + +## VIASH END + +# REQUIREMENTS + +## PCA, HEATMAP AND SCATTERPLOTS FOR SAMPLES IN COUNTS FILE +## - SAMPLE NAMES HAVE TO END IN e.g. "_R1" REPRESENTING REPLICATE ID. LAST 3 CHARACTERS OF SAMPLE NAME WILL BE TRIMMED TO OBTAIN GROUP ID FOR DESEQ2 COMPARISONS. + +# LOAD LIBRARIES +library(DESeq2) +library(ggplot2) +library(RColorBrewer) +library(pheatmap) +library(stringr) + +if (file.exists(par\$outdir) == FALSE) { + dir.create(par\$outdir, recursive = TRUE) +} + +# READ IN COUNTS FILE +count_table <- read.delim(file = par\$counts, header = TRUE, row.names = NULL) +rownames(count_table) <- count_table[, par\$id_col] +count_table <- count_table[, par\$count_col:ncol(count_table), drop = FALSE] +colnames(count_table) <- gsub(par\$sample_suffix, "", colnames(count_table)) +colnames(count_table) <- gsub(pattern = '\\\\.\$', replacement = '', colnames(count_table)) + +# RUN DESEQ2 +samples_vec <- colnames(count_table) +name_components <- strsplit(samples_vec, "_") +n_components <- length(name_components[[1]]) +decompose <- n_components != 1 && all(sapply(name_components, length) == n_components) +coldata <- data.frame(samples_vec, sample = samples_vec, row.names = 1) +if (decompose) { + groupings <- as.data.frame(lapply(1:n_components, function(i) sapply(name_components, "[[", i))) + n_distinct <- sapply(groupings, function(grp) length(unique(grp))) + groupings <- groupings[n_distinct != 1 & n_distinct != length(samples_vec)] + if (ncol(groupings) != 0) { + names(groupings) <- paste0("Group", 1:ncol(groupings)) + coldata <- cbind(coldata, groupings) + } else { + decompose <- FALSE + } +} + +DDSFile <- paste(par\$outdir, "/", par\$outprefix, ".dds.RData", sep = "") + +counts <- count_table[, samples_vec, drop = FALSE] +dds <- DESeqDataSetFromMatrix(countData = round(counts), colData = coldata, design = ~1) +dds <- estimateSizeFactors(dds) + +# No point if only one sample, or one gene +if (min(dim(count_table)) <= 1) { + save(dds, file = DDSFile) + saveRDS(dds, file = sub("\\\\.dds\\\\.RData\$", ".rds", DDSFile)) + warning("Not enough samples or genes in counts file for PCA.", call. = FALSE) + quit(save = "no", status = 0, runLast = FALSE) +} + +if (!par\$vst) { + vst_name <- "rlog" + rld <- rlog(dds) +} else { + vst_name <- "vst" + rld <- varianceStabilizingTransformation(dds) +} + +assay(dds, vst_name) <- assay(rld) +save(dds, file = DDSFile) +saveRDS(dds, file = sub("\\\\.dds\\\\.RData\$", ".rds", DDSFile)) + +# PLOT QC + +##' PCA pre-processeor +##' +##' Generate all the necessary information to plot PCA from a DESeq2 object +##' in which an assay containing a variance-stabilised matrix of counts is +##' stored. Copied from DESeq2::plotPCA, but with additional ability to +##' say which assay to run the PCA on. +##' +##' @param object The DESeq2DataSet object. +##' @param ntop number of top genes to use for principla components, selected by highest row variance. +##' @param assay the name or index of the assay that stores the variance-stabilised data. +##' @return A data.frame containing the projected data alongside the grouping columns. +##' A 'percentVar' attribute is set which includes the percentage of variation each PC explains, +##' and additionally how much the variation within that PC is explained by the grouping variable. +##' @author Gavin Kelly + +plotPCA_vst <- function(object, ntop = 500, assay = length(assays(object))) { + rv <- rowVars(assay(object, assay), useNames = TRUE) + select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))] + pca <- prcomp(t(assay(object, assay)[select, ]), center = TRUE, scale = FALSE) + percentVar <- pca\$sdev^2 / sum(pca\$sdev^2) + df <- cbind(as.data.frame(colData(object)), pca\$x) + # Order points so extreme samples are more likely to get label + ord <- order(abs(rank(df\$PC1) - median(df\$PC1)), abs(rank(df\$PC2) - median(df\$PC2))) + df <- df[ord, ] + attr(df, "percentVar") <- data.frame(PC = seq(along = percentVar), percentVar = 100 * percentVar) + return(df) +} + +PlotFile <- paste(par\$outdir, "/", par\$outprefix, ".plots.pdf", sep = "") + +pdf(file = PlotFile, onefile = TRUE, width = 7, height = 7) + +## PCA +ntop <- c(500, Inf) +for (n_top_var in ntop) { + pca_data <- plotPCA_vst(dds, assay = vst_name, ntop = n_top_var) + percentVar <- round(attr(pca_data, "percentVar")\$percentVar) + plot_subtitle <- ifelse(n_top_var == Inf, "All genes", paste("Top", n_top_var, "genes")) + pl <- ggplot(pca_data, aes(PC1, PC2, label = paste0(" ", sample, " "))) + + geom_point() + + geom_text(check_overlap = TRUE, vjust = 0.5, hjust="inward") + + xlab(paste0("PC1: ", percentVar[1], "% variance")) + + ylab(paste0("PC2: ", percentVar[2], "% variance")) + + labs(title = paste0("First PCs on ", vst_name, "-transformed data"), subtitle = plot_subtitle) + + theme(legend.position = "top", + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + panel.border = element_rect(colour = "black", fill = NA, size = 1)) + print(pl) + + if (decompose) { + pc_names <- paste0("PC", attr(pca_data, "percentVar")\$PC) + long_pc <- reshape(pca_data, varying=pc_names, direction="long", sep="", timevar="component", idvar="pcrow") + long_pc <- subset(long_pc, component <= 5) + long_pc_grp <- reshape(long_pc, varying = names(groupings), direction = "long", sep = "", timevar = "grouper") + long_pc_grp <- subset(long_pc_grp, grouper <= 5) + long_pc_grp\$component <- paste("PC", long_pc_grp\$component) + long_pc_grp\$grouper <- paste0(long_pc_grp\$grouper, c("st", "nd", "rd", "th", "th")[long_pc_grp\$grouper], " prefix") + pl <- ggplot(long_pc_grp, aes(x = Group, y = PC)) + + geom_point() + + stat_summary(fun = mean, geom = "line", aes(group = 1)) + + labs(x = NULL, y = NULL, subtitle = plot_subtitle, title = "PCs split by sample-name prefixes") + + facet_grid(component ~ grouper, scales = "free_x") + + scale_x_discrete(guide = guide_axis(n.dodge = 3)) + print(pl) + } +} # at end of loop, we'll be using the user-defined ntop if any, else all genes + +## WRITE PC1 vs PC2 VALUES TO FILE +pca_vals <- pca_data[, c("PC1", "PC2")] +colnames(pca_vals) <- paste0(colnames(pca_vals), ": ", percentVar[1:2], '% variance') +pca_vals <- cbind(sample = rownames(pca_vals), pca_vals) +pca_vals_file <- paste(par\$outdir, "/", par\$outprefix, ".pca_vals.txt", sep = "") +write.table(pca_vals, file = pca_vals_file, + row.names = FALSE, col.names = TRUE, + sep = "\\t", quote = TRUE) + +## SAMPLE CORRELATION HEATMAP +sampleDists <- dist(t(assay(dds, vst_name))) +sampleDistMatrix <- as.matrix(sampleDists) +colors <- colorRampPalette(rev(brewer.pal(9, "Blues")))(255) +pheatmap( + sampleDistMatrix, + clustering_distance_rows = sampleDists, + clustering_distance_cols = sampleDists, + col = colors, + main = paste("Euclidean distance between", vst_name, "of samples") +) + +## WRITE SAMPLE DISTANCES TO FILE +sample_dist_file <- paste(par\$outdir, "/", par\$outprefix, ".sample.dists.txt", sep = "") +write.table(cbind(sample = rownames(sampleDistMatrix), sampleDistMatrix), + file = sample_dist_file, row.names = FALSE, + col.names = TRUE, sep = "\\t", quote = FALSE) +dev.off() + +# SAVE SIZE FACTORS +SizeFactorsDir <- paste0(par\$outdir, "/size_factors/") +if (file.exists(SizeFactorsDir) == FALSE) { + dir.create(SizeFactorsDir, recursive = TRUE) +} + +NormFactorsFile <- paste(SizeFactorsDir, par\$outprefix, ".size_factors.RData", sep = "") + +normFactors <- sizeFactors(dds) +save(normFactors, file = NormFactorsFile) + +for (name in names(sizeFactors(dds))) { + sizeFactorFile <- paste(SizeFactorsDir, name, ".txt", sep = "") + write(as.numeric(sizeFactors(dds)[name]), file = sizeFactorFile) +} + +# R SESSION INFO +RLogFile <- "R_sessionInfo.log" + +sink(RLogFile) +a <- sessionInfo() +print(a) +sink() + +# Prepare files for MultiQC + +readLines(paste0(meta\$resources_dir, "/deseq2_pca_header.txt")) |> + stringr::str_replace(pattern = "#id: 'deseq2_pca'", + replace = paste0("#id: '", par\$label, "_deseq2_pca'")) |> + writeLines(con = "tmp.txt") + +readLines(paste0("tmp.txt")) |> + stringr::str_replace(pattern = "#section_name: 'DESeq2 PCA plot'", + replace = paste0("#section_name: 'DESeq2 PCA plot - '", par\$label)) |> + writeLines(con = "tmp.txt") + +system2("cat", args = paste0("tmp.txt ", pca_vals_file), stdout = par\$pca_multiqc) + +readLines(paste0(meta\$resources_dir, "/deseq2_clustering_header.txt")) |> + stringr::str_replace(pattern = "#id: 'deseq2_clustering'", + replace = paste0("#id: '", par\$label, "_deseq2_clustering'")) |> + writeLines(con = "tmp.txt") + +readLines(paste0("tmp.txt")) |> + stringr::str_replace(pattern = "#section_name: 'DESeq2 sample similarity'", + replace = paste0("#section_name: 'DESeq2 sample similarity - '", par\$label)) |> + writeLines(con = "tmp.txt") + +system2("cat", args = paste0("tmp.txt ", sample_dist_file), stdout = par\$sample_dists_multiqc) +VIASHMAIN +Rscript "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_COUNTS" ]; then + VIASH_PAR_COUNTS=$(ViashDockerStripAutomount "$VIASH_PAR_COUNTS") + fi + if [ ! -z "$VIASH_PAR_OUTDIR" ]; then + VIASH_PAR_OUTDIR=$(ViashDockerStripAutomount "$VIASH_PAR_OUTDIR") + fi + if [ ! -z "$VIASH_PAR_PCA_MULTIQC" ]; then + VIASH_PAR_PCA_MULTIQC=$(ViashDockerStripAutomount "$VIASH_PAR_PCA_MULTIQC") + fi + if [ ! -z "$VIASH_PAR_SAMPLE_DISTS_MULTIQC" ]; then + VIASH_PAR_SAMPLE_DISTS_MULTIQC=$(ViashDockerStripAutomount "$VIASH_PAR_SAMPLE_DISTS_MULTIQC") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTDIR" ] && [ ! -e "$VIASH_PAR_OUTDIR" ]; then + ViashError "Output file '$VIASH_PAR_OUTDIR' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PCA_MULTIQC" ] && [ ! -e "$VIASH_PAR_PCA_MULTIQC" ]; then + ViashError "Output file '$VIASH_PAR_PCA_MULTIQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SAMPLE_DISTS_MULTIQC" ] && [ ! -e "$VIASH_PAR_SAMPLE_DISTS_MULTIQC" ]; then + ViashError "Output file '$VIASH_PAR_SAMPLE_DISTS_MULTIQC' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/deseq2_qc/nextflow_labels.config b/target/executable/deseq2_qc/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/deseq2_qc/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/dupradar/.config.vsh.yaml b/target/executable/dupradar/.config.vsh.yaml new file mode 100644 index 0000000..9dd165c --- /dev/null +++ b/target/executable/dupradar/.config.vsh.yaml @@ -0,0 +1,307 @@ +name: "dupradar" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "Sample ID" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input" + description: "path to input alignment file in BAM format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf_annotation" + description: "path to GTF annotation file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--paired" + description: "add flag if input alignment file consists of paired reads" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "strandedness of input bam file reads (forward, reverse or unstranded\ + \ (default, applicable to paired reads))" + info: null + required: false + choices: + - "forward" + - "reverse" + - "unstranded" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_dupmatrix" + description: "path to output file (txt) of duplicate tag counts" + info: null + default: + - "$id.dup_matrix.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_dup_intercept_mqc" + description: "path to output file (txt) of multiqc intercept value DupRadar" + info: null + default: + - "$id.dup_intercept_mqc.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_duprate_exp_boxplot" + description: "path to output file (pdf) of distribution of expression box plot" + info: null + default: + - "$id.duprate_exp_boxplot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_duprate_exp_densplot" + description: "path to output file (pdf) of 2D density scatter plot of duplicate\ + \ tag counts" + info: null + default: + - "$id.duprate_exp_densityplot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_duprate_exp_denscurve_mqc" + description: "path to output file (pdf) of density curve of gene duplication multiqc" + info: null + default: + - "$id.duprate_exp_density_curve_mqc.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_expression_histogram" + description: "path to output file (pdf) of distribution of RPK values per gene\ + \ histogram" + info: null + default: + - "$id.expression_hist.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_intercept_slope" + description: "output file (txt) with progression of duplication rate value" + info: null + default: + - "$id.intercept_slope.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "dupradar.r" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Assessment of duplication rates in RNA-Seq datasets\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam" +- type: "file" + path: "genes.gtf" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/dupradar.nf" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "r-base" + interactive: false + - type: "r" + bioc: + - "dupRadar" + bioc_force_install: false + warnings_as_errors: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/dupradar/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/dupradar" + executable: "target/executable/dupradar/dupradar" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/dupradar/dupradar b/target/executable/dupradar/dupradar new file mode 100755 index 0000000..1b4322f --- /dev/null +++ b/target/executable/dupradar/dupradar @@ -0,0 +1,1444 @@ +#!/usr/bin/env bash + +# dupradar v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="dupradar" +VIASH_META_FUNCTIONALITY_NAME="dupradar" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y r-base && \ + rm -rf /var/lib/apt/lists/* + +RUN Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("dupRadar", quietly = TRUE)) BiocManager::install("dupRadar")' + +LABEL org.opencontainers.image.description="Companion container for running component dupradar" +LABEL org.opencontainers.image.created="2025-05-07T12:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "dupradar v0.3.0" + echo "" + echo "Assessment of duplication rates in RNA-Seq datasets" + echo "" + echo "Input:" + echo " --id" + echo " type: string" + echo " Sample ID" + echo "" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " path to input alignment file in BAM format" + echo "" + echo " --gtf_annotation" + echo " type: file, required parameter, file must exist" + echo " path to GTF annotation file." + echo "" + echo " --paired" + echo " type: boolean" + echo " add flag if input alignment file consists of paired reads" + echo "" + echo " --strandedness" + echo " type: string" + echo " choices: [ forward, reverse, unstranded ]" + echo " strandedness of input bam file reads (forward, reverse or unstranded" + echo " (default, applicable to paired reads))" + echo "" + echo "Output:" + echo " --output_dupmatrix" + echo " type: file, output, file must exist" + echo " default: \$id.dup_matrix.txt" + echo " path to output file (txt) of duplicate tag counts" + echo "" + echo " --output_dup_intercept_mqc" + echo " type: file, output, file must exist" + echo " default: \$id.dup_intercept_mqc.txt" + echo " path to output file (txt) of multiqc intercept value DupRadar" + echo "" + echo " --output_duprate_exp_boxplot" + echo " type: file, output, file must exist" + echo " default: \$id.duprate_exp_boxplot.pdf" + echo " path to output file (pdf) of distribution of expression box plot" + echo "" + echo " --output_duprate_exp_densplot" + echo " type: file, output, file must exist" + echo " default: \$id.duprate_exp_densityplot.pdf" + echo " path to output file (pdf) of 2D density scatter plot of duplicate tag" + echo " counts" + echo "" + echo " --output_duprate_exp_denscurve_mqc" + echo " type: file, output, file must exist" + echo " default: \$id.duprate_exp_density_curve_mqc.txt" + echo " path to output file (pdf) of density curve of gene duplication multiqc" + echo "" + echo " --output_expression_histogram" + echo " type: file, output, file must exist" + echo " default: \$id.expression_hist.pdf" + echo " path to output file (pdf) of distribution of RPK values per gene" + echo " histogram" + echo "" + echo " --output_intercept_slope" + echo " type: file, output, file must exist" + echo " default: \$id.intercept_slope.txt" + echo " output file (txt) with progression of duplication rate value" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "dupradar v0.3.0" + exit + ;; + --id) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id=*) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id=*\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf_annotation) + [ -n "$VIASH_PAR_GTF_ANNOTATION" ] && ViashError Bad arguments for option \'--gtf_annotation\': \'$VIASH_PAR_GTF_ANNOTATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_ANNOTATION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_annotation. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf_annotation=*) + [ -n "$VIASH_PAR_GTF_ANNOTATION" ] && ViashError Bad arguments for option \'--gtf_annotation=*\': \'$VIASH_PAR_GTF_ANNOTATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_ANNOTATION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --paired) + [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PAIRED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --paired. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --paired=*) + [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired=*\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PAIRED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --strandedness) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --strandedness. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strandedness=*) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness=*\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_dupmatrix) + [ -n "$VIASH_PAR_OUTPUT_DUPMATRIX" ] && ViashError Bad arguments for option \'--output_dupmatrix\': \'$VIASH_PAR_OUTPUT_DUPMATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPMATRIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_dupmatrix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_dupmatrix=*) + [ -n "$VIASH_PAR_OUTPUT_DUPMATRIX" ] && ViashError Bad arguments for option \'--output_dupmatrix=*\': \'$VIASH_PAR_OUTPUT_DUPMATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPMATRIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_dup_intercept_mqc) + [ -n "$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC" ] && ViashError Bad arguments for option \'--output_dup_intercept_mqc\': \'$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_dup_intercept_mqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_dup_intercept_mqc=*) + [ -n "$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC" ] && ViashError Bad arguments for option \'--output_dup_intercept_mqc=*\': \'$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_duprate_exp_boxplot) + [ -n "$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ] && ViashError Bad arguments for option \'--output_duprate_exp_boxplot\': \'$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_duprate_exp_boxplot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_duprate_exp_boxplot=*) + [ -n "$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ] && ViashError Bad arguments for option \'--output_duprate_exp_boxplot=*\': \'$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_duprate_exp_densplot) + [ -n "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ] && ViashError Bad arguments for option \'--output_duprate_exp_densplot\': \'$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_duprate_exp_densplot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_duprate_exp_densplot=*) + [ -n "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ] && ViashError Bad arguments for option \'--output_duprate_exp_densplot=*\': \'$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_duprate_exp_denscurve_mqc) + [ -n "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ] && ViashError Bad arguments for option \'--output_duprate_exp_denscurve_mqc\': \'$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_duprate_exp_denscurve_mqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_duprate_exp_denscurve_mqc=*) + [ -n "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ] && ViashError Bad arguments for option \'--output_duprate_exp_denscurve_mqc=*\': \'$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_expression_histogram) + [ -n "$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM" ] && ViashError Bad arguments for option \'--output_expression_histogram\': \'$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_expression_histogram. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_expression_histogram=*) + [ -n "$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM" ] && ViashError Bad arguments for option \'--output_expression_histogram=*\': \'$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_intercept_slope) + [ -n "$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE" ] && ViashError Bad arguments for option \'--output_intercept_slope\': \'$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_INTERCEPT_SLOPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_intercept_slope. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_intercept_slope=*) + [ -n "$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE" ] && ViashError Bad arguments for option \'--output_intercept_slope=*\': \'$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_INTERCEPT_SLOPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/dupradar:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_GTF_ANNOTATION+x} ]; then + ViashError '--gtf_annotation' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OUTPUT_DUPMATRIX+x} ]; then + VIASH_PAR_OUTPUT_DUPMATRIX="\$id.dup_matrix.txt" +fi +if [ -z ${VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC+x} ]; then + VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC="\$id.dup_intercept_mqc.txt" +fi +if [ -z ${VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT+x} ]; then + VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT="\$id.duprate_exp_boxplot.pdf" +fi +if [ -z ${VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT+x} ]; then + VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT="\$id.duprate_exp_densityplot.pdf" +fi +if [ -z ${VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC+x} ]; then + VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC="\$id.duprate_exp_density_curve_mqc.txt" +fi +if [ -z ${VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM+x} ]; then + VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM="\$id.expression_hist.pdf" +fi +if [ -z ${VIASH_PAR_OUTPUT_INTERCEPT_SLOPE+x} ]; then + VIASH_PAR_OUTPUT_INTERCEPT_SLOPE="\$id.intercept_slope.txt" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GTF_ANNOTATION" ] && [ ! -e "$VIASH_PAR_GTF_ANNOTATION" ]; then + ViashError "Input file '$VIASH_PAR_GTF_ANNOTATION' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_PAIRED" ]]; then + if ! [[ "$VIASH_PAR_PAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--paired' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_STRANDEDNESS" ]; then + VIASH_PAR_STRANDEDNESS_CHOICES=("forward;reverse;unstranded") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_STRANDEDNESS_CHOICES[*]};" =~ ";$VIASH_PAR_STRANDEDNESS;" ]]; then + ViashError '--strandedness' specified value of \'$VIASH_PAR_STRANDEDNESS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT_DUPMATRIX" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_DUPMATRIX")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_DUPMATRIX")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_GTF_ANNOTATION" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_GTF_ANNOTATION")" ) + VIASH_PAR_GTF_ANNOTATION=$(ViashDockerAutodetectMount "$VIASH_PAR_GTF_ANNOTATION") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPMATRIX" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_DUPMATRIX")" ) + VIASH_PAR_OUTPUT_DUPMATRIX=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_DUPMATRIX") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_DUPMATRIX" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC")" ) + VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT")" ) + VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT")" ) + VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC")" ) + VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM")" ) + VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE")" ) + VIASH_PAR_OUTPUT_INTERCEPT_SLOPE=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-dupradar-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\"'\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_GTF_ANNOTATION+x} ]; then echo "${VIASH_PAR_GTF_ANNOTATION}" | sed "s#'#'\"'\"'#g;s#.*#par_gtf_annotation='&'#" ; else echo "# par_gtf_annotation="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\"'\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\"'\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPMATRIX+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPMATRIX}" | sed "s#'#'\"'\"'#g;s#.*#par_output_dupmatrix='&'#" ; else echo "# par_output_dupmatrix="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC+x} ]; then echo "${VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC}" | sed "s#'#'\"'\"'#g;s#.*#par_output_dup_intercept_mqc='&'#" ; else echo "# par_output_dup_intercept_mqc="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT}" | sed "s#'#'\"'\"'#g;s#.*#par_output_duprate_exp_boxplot='&'#" ; else echo "# par_output_duprate_exp_boxplot="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT}" | sed "s#'#'\"'\"'#g;s#.*#par_output_duprate_exp_densplot='&'#" ; else echo "# par_output_duprate_exp_densplot="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC}" | sed "s#'#'\"'\"'#g;s#.*#par_output_duprate_exp_denscurve_mqc='&'#" ; else echo "# par_output_duprate_exp_denscurve_mqc="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM+x} ]; then echo "${VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM}" | sed "s#'#'\"'\"'#g;s#.*#par_output_expression_histogram='&'#" ; else echo "# par_output_expression_histogram="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_INTERCEPT_SLOPE+x} ]; then echo "${VIASH_PAR_OUTPUT_INTERCEPT_SLOPE}" | sed "s#'#'\"'\"'#g;s#.*#par_output_intercept_slope='&'#" ; else echo "# par_output_intercept_slope="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -exo pipefail + +function num_strandness { + if [ \$par_strandedness == 'unstranded' ]; then echo 0 + elif [ \$par_strandedness == 'forward' ]; then echo 1 + elif [ \$par_strandedness == 'reverse' ]; then echo 2 + else echo "strandedness must be unstranded, forward or reverse." && \\ + exit 1 + fi +} + +Rscript "\$meta_resources_dir/dupradar.r" \\ + \$par_input \\ + \$par_id \\ + \$par_gtf_annotation \\ + \$(num_strandness) \\ + \$par_paired \\ + \${meta_cpus:-1} + +mv "\$par_id"_dupMatrix.txt \$par_output_dupmatrix +mv "\$par_id"_dup_intercept_mqc.txt \$par_output_dup_intercept_mqc +mv "\$par_id"_duprateExpBoxplot.pdf \$par_output_duprate_exp_boxplot +mv "\$par_id"_duprateExpDens.pdf \$par_output_duprate_exp_densplot +mv "\$par_id"_duprateExpDensCurve_mqc.txt \$par_output_duprate_exp_denscurve_mqc +mv "\$par_id"_expressionHist.pdf \$par_output_expression_histogram +mv "\$par_id"_intercept_slope.txt \$par_output_intercept_slope +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") + fi + if [ ! -z "$VIASH_PAR_GTF_ANNOTATION" ]; then + VIASH_PAR_GTF_ANNOTATION=$(ViashDockerStripAutomount "$VIASH_PAR_GTF_ANNOTATION") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_DUPMATRIX" ]; then + VIASH_PAR_OUTPUT_DUPMATRIX=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_DUPMATRIX") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC" ]; then + VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ]; then + VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ]; then + VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ]; then + VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM" ]; then + VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE" ]; then + VIASH_PAR_OUTPUT_INTERCEPT_SLOPE=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT_DUPMATRIX" ] && [ ! -e "$VIASH_PAR_OUTPUT_DUPMATRIX" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_DUPMATRIX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC" ] && [ ! -e "$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ] && [ ! -e "$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ] && [ ! -e "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ] && [ ! -e "$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM" ] && [ ! -e "$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE" ] && [ ! -e "$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_INTERCEPT_SLOPE' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/dupradar/dupradar.r b/target/executable/dupradar/dupradar.r new file mode 100644 index 0000000..82218c6 --- /dev/null +++ b/target/executable/dupradar/dupradar.r @@ -0,0 +1,154 @@ +#!/usr/bin/env Rscript + +# Command line argument processing +args = commandArgs(trailingOnly=TRUE) +if (length(args) < 5) { + stop("Usage: dupRadar.r ", call.=FALSE) +} + +message("paired_end is", args[5]) +message("the type is is", class(args[5])) + +input_bam <- args[1] +output_prefix <- args[2] +annotation_gtf <- args[3] +stranded <- as.numeric(args[4]) +paired_end <- ifelse(args[5] == "true", TRUE, FALSE) +threads <- as.numeric(args[6]) + +bamRegex <- "(.+)\\.bam$" + +if(!(grepl(bamRegex, input_bam) && file.exists(input_bam) && (!file.info(input_bam)$isdir))) stop("First argument '' must be an existing file (not a directory) with '.bam' extension...") +if(!(file.exists(annotation_gtf) && (!file.info(annotation_gtf)$isdir))) stop("Third argument '' must be an existing file (and not a directory)...") +if(is.na(stranded) || (!(stranded %in% (0:2)))) stop("Fourth argument must be a numeric value in 0(unstranded)/1(forward)/2(reverse)...") +if(is.na(threads) || (threads<=0)) stop("Fifth argument must be a strictly positive numeric value...") + +# Debug messages (stderr) +message("Input bam (Arg 1): ", input_bam) +message("Output basename(Arg 2): ", output_prefix) +message("Input gtf (Arg 3): ", annotation_gtf) +message("Strandness (Arg 4): ", c("unstranded", "forward", "reverse")[stranded+1]) +message("paired_end (Arg 5): ", paired_end) +message("Nb threads (Arg 6): ", threads) +message("R package loc. (Arg 7): ", ifelse(length(args) > 4, args[5], "Not specified")) + + +# Load / install packages +if (length(args) > 5) { .libPaths( c( args[6], .libPaths() ) ) } +if (!require("dupRadar")){ + source("http://bioconductor.org/biocLite.R") + biocLite("dupRadar", suppressUpdates=TRUE) + library("dupRadar") +} +if (!require("parallel")) { + install.packages("parallel", dependencies=TRUE, repos='http://cloud.r-project.org/') + library("parallel") +} + +# Duplicate stats +dm <- analyzeDuprates(input_bam, annotation_gtf, stranded, paired_end, threads) +write.table(dm, file=paste(output_prefix, "_dupMatrix.txt", sep=""), quote=F, row.name=F, sep="\t") + +# 2D density scatter plot +pdf(paste0(output_prefix, "_duprateExpDens.pdf")) +duprateExpDensPlot(DupMat=dm) +title("Density scatter plot") +mtext(output_prefix, side=3) +dev.off() +fit <- duprateExpFit(DupMat=dm) +cat( + paste("- dupRadar Int (duprate at low read counts):", fit$intercept), + paste("- dupRadar Sl (progression of the duplication rate):", fit$slope), + fill=TRUE, labels=output_prefix, + file=paste0(output_prefix, "_intercept_slope.txt"), append=FALSE +) + +# Create a multiqc file dupInt +sample_name <- gsub("Aligned.sortedByCoord.out.markDups", "", output_prefix) +line="#id: DupInt +#plot_type: 'generalstats' +#pconfig: +# dupRadar_intercept: +# title: 'dupInt' +# namespace: 'DupRadar' +# description: 'Intercept value from DupRadar' +# max: 100 +# min: 0 +# scale: 'RdYlGn-rev' +# format: '{:.2f}%' +Sample dupRadar_intercept" + +write(line,file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE) +write(paste(sample_name, fit$intercept),file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE) + +# Get numbers from dupRadar GLM +curve_x <- sort(log10(dm$RPK)) +curve_y = 100*predict(fit$glm, data.frame(x=curve_x), type="response") +# Remove all of the infinite values +infs = which(curve_x %in% c(-Inf,Inf)) +curve_x = curve_x[-infs] +curve_y = curve_y[-infs] +# Reduce number of data points +curve_x <- curve_x[seq(1, length(curve_x), 10)] +curve_y <- curve_y[seq(1, length(curve_y), 10)] +# Convert x values back to real counts +curve_x = 10^curve_x +# Write to file +line="#id: dupradar +#section_name: 'DupRadar' +#section_href: 'bioconductor.org/packages/release/bioc/html/dupRadar.html' +#description: \"provides duplication rate quality control for RNA-Seq datasets. Highly expressed genes can be expected to have a lot of duplicate reads, but high numbers of duplicates at low read counts can indicate low library complexity with technical duplication. +# This plot shows the general linear models - a summary of the gene duplication distributions. \" +#pconfig: +# title: 'DupRadar General Linear Model' +# xLog: True +# xlab: 'expression (reads/kbp)' +# ylab: '% duplicate reads' +# ymax: 100 +# ymin: 0 +# tt_label: '{point.x:.1f} reads/kbp: {point.y:,.2f}% duplicates' +# xPlotLines: +# - color: 'green' +# dashStyle: 'LongDash' +# label: +# style: {color: 'green'} +# text: '0.5 RPKM' +# verticalAlign: 'bottom' +# y: -65 +# value: 0.5 +# width: 1 +# - color: 'red' +# dashStyle: 'LongDash' +# label: +# style: {color: 'red'} +# text: '1 read/bp' +# verticalAlign: 'bottom' +# y: -65 +# value: 1000 +# width: 1" + +write(line,file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"),append=TRUE) +write.table( + cbind(curve_x, curve_y), + file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"), + quote=FALSE, row.names=FALSE, col.names=FALSE, append=TRUE, +) + +# Distribution of expression box plot +pdf(paste0(output_prefix, "_duprateExpBoxplot.pdf")) +duprateExpBoxplot(DupMat=dm) +title("Percent Duplication by Expression") +mtext(output_prefix, side=3) +dev.off() + +# Distribution of RPK values per gene +pdf(paste0(output_prefix, "_expressionHist.pdf")) +expressionHist(DupMat=dm) +title("Distribution of RPK values per gene") +mtext(output_prefix, side=3) +dev.off() + +# Print sessioninfo to standard out +print(output_prefix) +citation("dupRadar") +sessionInfo() diff --git a/target/executable/dupradar/nextflow_labels.config b/target/executable/dupradar/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/dupradar/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/getchromsizes/.config.vsh.yaml b/target/executable/getchromsizes/.config.vsh.yaml new file mode 100644 index 0000000..dd040f7 --- /dev/null +++ b/target/executable/getchromsizes/.config.vsh.yaml @@ -0,0 +1,207 @@ +name: "getchromsizes" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--fasta" + description: "Genome fasta files" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--sizes" + description: "File containing chromosome lengths" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fai" + description: "FASTA index file" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gzi" + description: "Optional gzip index file for compressed inputs" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Generates a FASTA file of chromosome sizes and a fasta index file.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "genome.fasta" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/custom/getchromsizes/main.nf" + - "modules/nf-core/custom/getchromsizes/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && \\\napt-get install -y autoconf automake make gcc perl zlib1g-dev\ + \ libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev libncurses5-dev curl\ + \ bzip2 && \\\ncurl -fsSL https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2\ + \ -o samtools-1.18.tar.bz2 && \\\ntar -xjf samtools-1.18.tar.bz2 && \\\nrm samtools-1.18.tar.bz2\ + \ && \\\ncd samtools-1.18 && \\\n./configure && \\\nmake && \\\nmake install\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/getchromsizes/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/getchromsizes" + executable: "target/executable/getchromsizes/getchromsizes" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/getchromsizes/getchromsizes b/target/executable/getchromsizes/getchromsizes new file mode 100755 index 0000000..dce3de0 --- /dev/null +++ b/target/executable/getchromsizes/getchromsizes @@ -0,0 +1,1168 @@ +#!/usr/bin/env bash + +# getchromsizes v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="getchromsizes" +VIASH_META_FUNCTIONALITY_NAME="getchromsizes" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ +apt-get install -y autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev libncurses5-dev curl bzip2 && \ +curl -fsSL https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2 -o samtools-1.18.tar.bz2 && \ +tar -xjf samtools-1.18.tar.bz2 && \ +rm samtools-1.18.tar.bz2 && \ +cd samtools-1.18 && \ +./configure && \ +make && \ +make install + +LABEL org.opencontainers.image.description="Companion container for running component getchromsizes" +LABEL org.opencontainers.image.created="2025-05-07T12:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "getchromsizes v0.3.0" + echo "" + echo "Generates a FASTA file of chromosome sizes and a fasta index file." + echo "" + echo "Input:" + echo " --fasta" + echo " type: file, file must exist" + echo " Genome fasta files" + echo "" + echo "Output:" + echo " --sizes" + echo " type: file, output, file must exist" + echo " File containing chromosome lengths" + echo "" + echo " --fai" + echo " type: file, output, file must exist" + echo " FASTA index file" + echo "" + echo " --gzi" + echo " type: file, output, file must exist" + echo " Optional gzip index file for compressed inputs" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "getchromsizes v0.3.0" + exit + ;; + --fasta) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fasta=*) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta=*\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sizes) + [ -n "$VIASH_PAR_SIZES" ] && ViashError Bad arguments for option \'--sizes\': \'$VIASH_PAR_SIZES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SIZES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sizes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sizes=*) + [ -n "$VIASH_PAR_SIZES" ] && ViashError Bad arguments for option \'--sizes=*\': \'$VIASH_PAR_SIZES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SIZES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fai) + [ -n "$VIASH_PAR_FAI" ] && ViashError Bad arguments for option \'--fai\': \'$VIASH_PAR_FAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAI="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fai. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fai=*) + [ -n "$VIASH_PAR_FAI" ] && ViashError Bad arguments for option \'--fai=*\': \'$VIASH_PAR_FAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAI=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gzi) + [ -n "$VIASH_PAR_GZI" ] && ViashError Bad arguments for option \'--gzi\': \'$VIASH_PAR_GZI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GZI="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gzi. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gzi=*) + [ -n "$VIASH_PAR_GZI" ] && ViashError Bad arguments for option \'--gzi=*\': \'$VIASH_PAR_GZI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GZI=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/getchromsizes:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTA" ] && [ ! -e "$VIASH_PAR_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_FASTA' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_SIZES" ] && [ ! -d "$(dirname "$VIASH_PAR_SIZES")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_SIZES")" +fi +if [ ! -z "$VIASH_PAR_FAI" ] && [ ! -d "$(dirname "$VIASH_PAR_FAI")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FAI")" +fi +if [ ! -z "$VIASH_PAR_GZI" ] && [ ! -d "$(dirname "$VIASH_PAR_GZI")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GZI")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_FASTA" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTA")" ) + VIASH_PAR_FASTA=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTA") +fi +if [ ! -z "$VIASH_PAR_SIZES" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_SIZES")" ) + VIASH_PAR_SIZES=$(ViashDockerAutodetectMount "$VIASH_PAR_SIZES") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_SIZES" ) +fi +if [ ! -z "$VIASH_PAR_FAI" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FAI")" ) + VIASH_PAR_FAI=$(ViashDockerAutodetectMount "$VIASH_PAR_FAI") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_FAI" ) +fi +if [ ! -z "$VIASH_PAR_GZI" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_GZI")" ) + VIASH_PAR_GZI=$(ViashDockerAutodetectMount "$VIASH_PAR_GZI") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_GZI" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-getchromsizes-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_FASTA+x} ]; then echo "${VIASH_PAR_FASTA}" | sed "s#'#'\"'\"'#g;s#.*#par_fasta='&'#" ; else echo "# par_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_SIZES+x} ]; then echo "${VIASH_PAR_SIZES}" | sed "s#'#'\"'\"'#g;s#.*#par_sizes='&'#" ; else echo "# par_sizes="; fi ) +$( if [ ! -z ${VIASH_PAR_FAI+x} ]; then echo "${VIASH_PAR_FAI}" | sed "s#'#'\"'\"'#g;s#.*#par_fai='&'#" ; else echo "# par_fai="; fi ) +$( if [ ! -z ${VIASH_PAR_GZI+x} ]; then echo "${VIASH_PAR_GZI}" | sed "s#'#'\"'\"'#g;s#.*#par_gzi='&'#" ; else echo "# par_gzi="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +filename="\$(basename -- \$par_fasta)" + +samtools faidx \$par_fasta +cut -f 1,2 "\$par_fasta.fai" > \$par_sizes +mv "\$par_fasta.fai" \$par_fai +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_FASTA" ]; then + VIASH_PAR_FASTA=$(ViashDockerStripAutomount "$VIASH_PAR_FASTA") + fi + if [ ! -z "$VIASH_PAR_SIZES" ]; then + VIASH_PAR_SIZES=$(ViashDockerStripAutomount "$VIASH_PAR_SIZES") + fi + if [ ! -z "$VIASH_PAR_FAI" ]; then + VIASH_PAR_FAI=$(ViashDockerStripAutomount "$VIASH_PAR_FAI") + fi + if [ ! -z "$VIASH_PAR_GZI" ]; then + VIASH_PAR_GZI=$(ViashDockerStripAutomount "$VIASH_PAR_GZI") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_SIZES" ] && [ ! -e "$VIASH_PAR_SIZES" ]; then + ViashError "Output file '$VIASH_PAR_SIZES' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_FAI" ] && [ ! -e "$VIASH_PAR_FAI" ]; then + ViashError "Output file '$VIASH_PAR_FAI' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GZI" ] && [ ! -e "$VIASH_PAR_GZI" ]; then + ViashError "Output file '$VIASH_PAR_GZI' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/getchromsizes/nextflow_labels.config b/target/executable/getchromsizes/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/getchromsizes/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/gtf2bed/.config.vsh.yaml b/target/executable/gtf2bed/.config.vsh.yaml new file mode 100644 index 0000000..9e8788c --- /dev/null +++ b/target/executable/gtf2bed/.config.vsh.yaml @@ -0,0 +1,185 @@ +name: "gtf2bed" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--gtf" + description: "A reference file in GTF format." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: " Output" + arguments: + - type: "file" + name: "--bed_output" + description: "BED file resulting from the conversion of the GTF input file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "gtf2bed.pl" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Create BED annotation file from GTF.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "genes.gtf.gz" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/gtf2bed.nf" + last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "perl" + interactive: false + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/gtf2bed/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/gtf2bed" + executable: "target/executable/gtf2bed/gtf2bed" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/gtf2bed/gtf2bed b/target/executable/gtf2bed/gtf2bed new file mode 100755 index 0000000..f735166 --- /dev/null +++ b/target/executable/gtf2bed/gtf2bed @@ -0,0 +1,1104 @@ +#!/usr/bin/env bash + +# gtf2bed v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="gtf2bed" +VIASH_META_FUNCTIONALITY_NAME="gtf2bed" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y perl && \ + rm -rf /var/lib/apt/lists/* + +LABEL org.opencontainers.image.description="Companion container for running component gtf2bed" +LABEL org.opencontainers.image.created="2025-05-07T12:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "gtf2bed v0.3.0" + echo "" + echo "Create BED annotation file from GTF." + echo "" + echo "Input:" + echo " --gtf" + echo " type: file, required parameter, file must exist" + echo " A reference file in GTF format." + echo "" + echo " Output:" + echo " --bed_output" + echo " type: file, required parameter, output, file must exist" + echo " BED file resulting from the conversion of the GTF input file." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "gtf2bed v0.3.0" + exit + ;; + --gtf) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf=*) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bed_output) + [ -n "$VIASH_PAR_BED_OUTPUT" ] && ViashError Bad arguments for option \'--bed_output\': \'$VIASH_PAR_BED_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BED_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bed_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bed_output=*) + [ -n "$VIASH_PAR_BED_OUTPUT" ] && ViashError Bad arguments for option \'--bed_output=*\': \'$VIASH_PAR_BED_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BED_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/gtf2bed:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_GTF+x} ]; then + ViashError '--gtf' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_BED_OUTPUT+x} ]; then + ViashError '--bed_output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then + ViashError "Input file '$VIASH_PAR_GTF' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_BED_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_BED_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BED_OUTPUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_GTF" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_GTF")" ) + VIASH_PAR_GTF=$(ViashDockerAutodetectMount "$VIASH_PAR_GTF") +fi +if [ ! -z "$VIASH_PAR_BED_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BED_OUTPUT")" ) + VIASH_PAR_BED_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_BED_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_BED_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-gtf2bed-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_BED_OUTPUT+x} ]; then echo "${VIASH_PAR_BED_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_bed_output='&'#" ; else echo "# par_bed_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +perl "\$meta_resources_dir/gtf2bed.pl" \$par_gtf > \$par_bed_output +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_GTF" ]; then + VIASH_PAR_GTF=$(ViashDockerStripAutomount "$VIASH_PAR_GTF") + fi + if [ ! -z "$VIASH_PAR_BED_OUTPUT" ]; then + VIASH_PAR_BED_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_BED_OUTPUT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_BED_OUTPUT" ] && [ ! -e "$VIASH_PAR_BED_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_BED_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/gtf2bed/gtf2bed.pl b/target/executable/gtf2bed/gtf2bed.pl new file mode 100755 index 0000000..4da5c04 --- /dev/null +++ b/target/executable/gtf2bed/gtf2bed.pl @@ -0,0 +1,122 @@ +#!/usr/bin/env perl + +# Copyright (c) 2011 Erik Aronesty (erik@q32.com) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ALSO, IT WOULD BE NICE IF YOU LET ME KNOW YOU USED IT. + +use Getopt::Long; + +my $extended; +GetOptions("x"=>\$extended); + +$in = shift @ARGV; + +my $in_cmd =($in =~ /\.gz$/ ? "gunzip -c $in|" : $in =~ /\.zip$/ ? "unzip -p $in|" : "$in") || die "Can't open $in: $!\n"; +open IN, $in_cmd; + +while () { + $gff = 2 if /^##gff-version 2/; + $gff = 3 if /^##gff-version 3/; + next if /^#/ && $gff; + + s/\s+$//; + # 0-chr 1-src 2-feat 3-beg 4-end 5-scor 6-dir 7-fram 8-attr + my @f = split /\t/; + if ($gff) { + # most ver 2's stick gene names in the id field + ($id) = $f[8]=~ /\bID="([^"]+)"/; + # most ver 3's stick unquoted names in the name field + ($id) = $f[8]=~ /\bName=([^";]+)/ if !$id && $gff == 3; + } else { + ($id) = $f[8]=~ /transcript_id "([^"]+)"/; + } + + next unless $id && $f[0]; + + if ($f[2] eq 'exon') { + die "no position at exon on line $." if ! $f[3]; + # gff3 puts :\d in exons sometimes + $id =~ s/:\d+$// if $gff == 3; + push @{$exons{$id}}, \@f; + # save lowest start + $trans{$id} = \@f if !$trans{$id}; + } elsif ($f[2] eq 'start_codon') { + #optional, output codon start/stop as "thick" region in bed + $sc{$id}->[0] = $f[3]; + } elsif ($f[2] eq 'stop_codon') { + $sc{$id}->[1] = $f[4]; + } elsif ($f[2] eq 'miRNA' ) { + $trans{$id} = \@f if !$trans{$id}; + push @{$exons{$id}}, \@f; + } +} + +for $id ( + # sort by chr then pos + sort { + $trans{$a}->[0] eq $trans{$b}->[0] ? + $trans{$a}->[3] <=> $trans{$b}->[3] : + $trans{$a}->[0] cmp $trans{$b}->[0] + } (keys(%trans)) ) { + my ($chr, undef, undef, undef, undef, undef, $dir, undef, $attr, undef, $cds, $cde) = @{$trans{$id}}; + my ($cds, $cde); + ($cds, $cde) = @{$sc{$id}} if $sc{$id}; + + # sort by pos + my @ex = sort { + $a->[3] <=> $b->[3] + } @{$exons{$id}}; + + my $beg = $ex[0][3]; + my $end = $ex[-1][4]; + + if ($dir eq '-') { + # swap + $tmp=$cds; + $cds=$cde; + $cde=$tmp; + $cds -= 2 if $cds; + $cde += 2 if $cde; + } + + # not specified, just use exons + $cds = $beg if !$cds; + $cde = $end if !$cde; + + # adjust start for bed + --$beg; --$cds; + + my $exn = @ex; # exon count + my $exst = join ",", map {$_->[3]-$beg-1} @ex; # exon start + my $exsz = join ",", map {$_->[4]-$_->[3]+1} @ex; # exon size + + my $gene_id; + my $extend = ""; + if ($extended) { + ($gene_id) = $attr =~ /gene_name "([^"]+)"/; + ($gene_id) = $attr =~ /gene_id "([^"]+)"/ unless $gene_id; + $extend="\t$gene_id"; + } + # added an extra comma to make it look exactly like ucsc's beds + print "$chr\t$beg\t$end\t$id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n"; +} + +close IN; diff --git a/target/executable/gtf2bed/nextflow_labels.config b/target/executable/gtf2bed/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/gtf2bed/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/gtf_filter/.config.vsh.yaml b/target/executable/gtf_filter/.config.vsh.yaml new file mode 100644 index 0000000..0839f18 --- /dev/null +++ b/target/executable/gtf_filter/.config.vsh.yaml @@ -0,0 +1,195 @@ +name: "gtf_filter" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--fasta" + description: "Genome fasta file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "GTF file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--skip_transcript_id_check" + description: "Skip checking for transcript IDs in the GTF file." + info: null + direction: "input" +- name: " Output" + arguments: + - type: "file" + name: "--filtered_gtf" + description: "Filtered GTF file containing only sequences in the FASTA file" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "python_script" + path: "script.py" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Filters a GTF file based on sequence names in a FASTA file.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "genome.fasta" +- type: "file" + path: "genes.gtf.gz" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/gtf_filter.nf" + last_sha: "1c6012ecbb087014ea4b8f0f3d39b874850277a8" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/gtf_filter/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/gtf_filter" + executable: "target/executable/gtf_filter/gtf_filter" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/gtf_filter/gtf_filter b/target/executable/gtf_filter/gtf_filter new file mode 100755 index 0000000..2d7dada --- /dev/null +++ b/target/executable/gtf_filter/gtf_filter @@ -0,0 +1,1189 @@ +#!/usr/bin/env bash + +# gtf_filter v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="gtf_filter" +VIASH_META_FUNCTIONALITY_NAME="gtf_filter" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM python:latest +ENTRYPOINT [] +LABEL org.opencontainers.image.description="Companion container for running component gtf_filter" +LABEL org.opencontainers.image.created="2025-05-07T12:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "gtf_filter v0.3.0" + echo "" + echo "Filters a GTF file based on sequence names in a FASTA file." + echo "" + echo "Input:" + echo " --fasta" + echo " type: file, file must exist" + echo " Genome fasta file" + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo " GTF file" + echo "" + echo " --skip_transcript_id_check" + echo " type: boolean_true" + echo " Skip checking for transcript IDs in the GTF file." + echo "" + echo " Output:" + echo " --filtered_gtf" + echo " type: file, output, file must exist" + echo " Filtered GTF file containing only sequences in the FASTA file" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "gtf_filter v0.3.0" + exit + ;; + --fasta) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fasta=*) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta=*\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf=*) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_transcript_id_check) + [ -n "$VIASH_PAR_SKIP_TRANSCRIPT_ID_CHECK" ] && ViashError Bad arguments for option \'--skip_transcript_id_check\': \'$VIASH_PAR_SKIP_TRANSCRIPT_ID_CHECK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_TRANSCRIPT_ID_CHECK=true + shift 1 + ;; + --filtered_gtf) + [ -n "$VIASH_PAR_FILTERED_GTF" ] && ViashError Bad arguments for option \'--filtered_gtf\': \'$VIASH_PAR_FILTERED_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FILTERED_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --filtered_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --filtered_gtf=*) + [ -n "$VIASH_PAR_FILTERED_GTF" ] && ViashError Bad arguments for option \'--filtered_gtf=*\': \'$VIASH_PAR_FILTERED_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FILTERED_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/gtf_filter:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_SKIP_TRANSCRIPT_ID_CHECK+x} ]; then + VIASH_PAR_SKIP_TRANSCRIPT_ID_CHECK="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTA" ] && [ ! -e "$VIASH_PAR_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then + ViashError "Input file '$VIASH_PAR_GTF' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_SKIP_TRANSCRIPT_ID_CHECK" ]]; then + if ! [[ "$VIASH_PAR_SKIP_TRANSCRIPT_ID_CHECK" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_transcript_id_check' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_FILTERED_GTF" ] && [ ! -d "$(dirname "$VIASH_PAR_FILTERED_GTF")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FILTERED_GTF")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_FASTA" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTA")" ) + VIASH_PAR_FASTA=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTA") +fi +if [ ! -z "$VIASH_PAR_GTF" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_GTF")" ) + VIASH_PAR_GTF=$(ViashDockerAutodetectMount "$VIASH_PAR_GTF") +fi +if [ ! -z "$VIASH_PAR_FILTERED_GTF" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FILTERED_GTF")" ) + VIASH_PAR_FILTERED_GTF=$(ViashDockerAutodetectMount "$VIASH_PAR_FILTERED_GTF") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_FILTERED_GTF" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-gtf_filter-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'fasta': $( if [ ! -z ${VIASH_PAR_FASTA+x} ]; then echo "r'${VIASH_PAR_FASTA//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'gtf': $( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "r'${VIASH_PAR_GTF//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'skip_transcript_id_check': $( if [ ! -z ${VIASH_PAR_SKIP_TRANSCRIPT_ID_CHECK+x} ]; then echo "r'${VIASH_PAR_SKIP_TRANSCRIPT_ID_CHECK//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'filtered_gtf': $( if [ ! -z ${VIASH_PAR_FILTERED_GTF+x} ]; then echo "r'${VIASH_PAR_FILTERED_GTF//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +dep = { + +} + +## VIASH END +# Adapted from https://github.com/nf-core/rnaseq/blob/3.14.0/bin/filter_gtf.py + +import os +import sys +import re +import statistics +from typing import Set + +def extract_fasta_seq_names(fasta_name: str) -> Set[str]: + """Extracts the sequence names from a FASTA file.""" + with open(fasta_name) as fasta: + return {line[1:].split(None, 1)[0] for line in fasta if line.startswith(">")} + +def tab_delimited(file: str) -> float: + """Check if file is tab-delimited and return median number of tabs.""" + with open(file, "r") as f: + data = f.read(102400) + return statistics.median(line.count("\\t") for line in data.split("\\n")) + +def filter_gtf(fasta: str, gtf_in: str, filtered_gtf_out: str, skip_transcript_id_check: bool) -> None: + """Filter GTF file based on FASTA sequence names.""" + if tab_delimited(gtf_in) != 8: + raise ValueError("Invalid GTF file: Expected 9 tab-separated columns.") + seq_names_in_genome = extract_fasta_seq_names(fasta) + print(f"Extracted chromosome sequence names from {fasta}") + print("All sequence IDs from FASTA: " + ", ".join(sorted(seq_names_in_genome))) + seq_names_in_gtf = set() + try: + with open(gtf_in) as gtf, open(filtered_gtf_out, "w") as out: + line_count = 0 + for line in gtf: + seq_name = line.split("\\t")[0] + seq_names_in_gtf.add(seq_name) # Add sequence name to the set + if seq_name in seq_names_in_genome: + if skip_transcript_id_check or re.search(r'transcript_id "([^"]+)"', line): + out.write(line) + line_count += 1 + if line_count == 0: + raise ValueError("All GTF lines removed by filters") + except IOError as e: + print(f"File operation failed: {e}") + return + + print("All sequence IDs from GTF: " + ", ".join(sorted(seq_names_in_gtf))) + print(f"Extracted {line_count} matching sequences from {gtf_in} into {filtered_gtf_out}") + +filter_gtf(par["fasta"], par["gtf"], par["filtered_gtf"], par["skip_transcript_id_check"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_FASTA" ]; then + VIASH_PAR_FASTA=$(ViashDockerStripAutomount "$VIASH_PAR_FASTA") + fi + if [ ! -z "$VIASH_PAR_GTF" ]; then + VIASH_PAR_GTF=$(ViashDockerStripAutomount "$VIASH_PAR_GTF") + fi + if [ ! -z "$VIASH_PAR_FILTERED_GTF" ]; then + VIASH_PAR_FILTERED_GTF=$(ViashDockerStripAutomount "$VIASH_PAR_FILTERED_GTF") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FILTERED_GTF" ] && [ ! -e "$VIASH_PAR_FILTERED_GTF" ]; then + ViashError "Output file '$VIASH_PAR_FILTERED_GTF' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/gtf_filter/nextflow_labels.config b/target/executable/gtf_filter/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/gtf_filter/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/gunzip/.config.vsh.yaml b/target/executable/gunzip/.config.vsh.yaml new file mode 100644 index 0000000..b7f6f0b --- /dev/null +++ b/target/executable/gunzip/.config.vsh.yaml @@ -0,0 +1,184 @@ +name: "gunzip" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "Path of file to be uncompressed" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + description: "Decompressed file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Compress or uncompress a file or list of files.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "genes.gff.gz" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/gunzip/main.nf" + - "modules/nf-core/gunzip/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "gzip" + interactive: false + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/gunzip/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/gunzip" + executable: "target/executable/gunzip/gunzip" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/gunzip/gunzip b/target/executable/gunzip/gunzip new file mode 100755 index 0000000..e6de144 --- /dev/null +++ b/target/executable/gunzip/gunzip @@ -0,0 +1,1110 @@ +#!/usr/bin/env bash + +# gunzip v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="gunzip" +VIASH_META_FUNCTIONALITY_NAME="gunzip" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y gzip && \ + rm -rf /var/lib/apt/lists/* + +LABEL org.opencontainers.image.description="Companion container for running component gunzip" +LABEL org.opencontainers.image.created="2025-05-07T12:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "gunzip v0.3.0" + echo "" + echo "Compress or uncompress a file or list of files." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " Path of file to be uncompressed" + echo "" + echo "Output:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " Decompressed file." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "gunzip v0.3.0" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/gunzip:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-gunzip-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +filename="\$(basename -- "\$par_input")" + +if [ \${filename##*.} == "gz" ]; then + gunzip -c \$par_input > \$par_output +else + cat \$par_input > \$par_output +fi +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/gunzip/nextflow_labels.config b/target/executable/gunzip/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/gunzip/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/multiqc_custom_biotype/.config.vsh.yaml b/target/executable/multiqc_custom_biotype/.config.vsh.yaml new file mode 100644 index 0000000..de7ca65 --- /dev/null +++ b/target/executable/multiqc_custom_biotype/.config.vsh.yaml @@ -0,0 +1,205 @@ +name: "multiqc_custom_biotype" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--biocounts" + description: "File with all biocounts" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--id" + description: "Sample name" + info: null + default: + - "$id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--features" + description: "Features to count" + info: null + default: + - "rRNA" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--featurecounts_multiqc" + info: null + default: + - "$id.biotype_counts_mqc.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_rrna_multiqc" + info: null + default: + - "$id.biotype_counts_rrna_mqc.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "python_script" + path: "script.py" + is_executable: true +- type: "file" + path: "biotypes_header.txt" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Calculate features percentage for biotype counts" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python:latest" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/multiqc_custom_biotype/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/multiqc_custom_biotype" + executable: "target/executable/multiqc_custom_biotype/multiqc_custom_biotype" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/multiqc_custom_biotype/biotypes_header.txt b/target/executable/multiqc_custom_biotype/biotypes_header.txt new file mode 100644 index 0000000..dff52c0 --- /dev/null +++ b/target/executable/multiqc_custom_biotype/biotypes_header.txt @@ -0,0 +1,11 @@ +# id: 'biotype_counts' +# section_name: 'Biotype Counts' +# description: "shows reads overlapping genomic features of different biotypes, +# counted by featureCounts." +# plot_type: 'bargraph' +# anchor: 'featurecounts_biotype' +# pconfig: +# id: "featurecounts_biotype_plot" +# title: "featureCounts: Biotypes" +# xlab: "# Reads" +# cpswitch_counts_label: "Number of Reads" diff --git a/target/executable/multiqc_custom_biotype/multiqc_custom_biotype b/target/executable/multiqc_custom_biotype/multiqc_custom_biotype new file mode 100755 index 0000000..a025bb6 --- /dev/null +++ b/target/executable/multiqc_custom_biotype/multiqc_custom_biotype @@ -0,0 +1,1264 @@ +#!/usr/bin/env bash + +# multiqc_custom_biotype v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="multiqc_custom_biotype" +VIASH_META_FUNCTIONALITY_NAME="multiqc_custom_biotype" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM python:latest +ENTRYPOINT [] +LABEL org.opencontainers.image.description="Companion container for running component multiqc_custom_biotype" +LABEL org.opencontainers.image.created="2025-05-07T12:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "multiqc_custom_biotype v0.3.0" + echo "" + echo "Calculate features percentage for biotype counts" + echo "" + echo "Input:" + echo " --biocounts" + echo " type: file, file must exist" + echo " File with all biocounts" + echo "" + echo " --id" + echo " type: string" + echo " default: \$id" + echo " Sample name" + echo "" + echo " --features" + echo " type: string" + echo " default: rRNA" + echo " Features to count" + echo "" + echo "Output:" + echo " --featurecounts_multiqc" + echo " type: file, output, file must exist" + echo " default: \$id.biotype_counts_mqc.tsv" + echo "" + echo " --featurecounts_rrna_multiqc" + echo " type: file, output, file must exist" + echo " default: \$id.biotype_counts_rrna_mqc.tsv" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "multiqc_custom_biotype v0.3.0" + exit + ;; + --biocounts) + [ -n "$VIASH_PAR_BIOCOUNTS" ] && ViashError Bad arguments for option \'--biocounts\': \'$VIASH_PAR_BIOCOUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIOCOUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --biocounts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --biocounts=*) + [ -n "$VIASH_PAR_BIOCOUNTS" ] && ViashError Bad arguments for option \'--biocounts=*\': \'$VIASH_PAR_BIOCOUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIOCOUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --id) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id=*) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id=*\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --features) + [ -n "$VIASH_PAR_FEATURES" ] && ViashError Bad arguments for option \'--features\': \'$VIASH_PAR_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --features=*) + [ -n "$VIASH_PAR_FEATURES" ] && ViashError Bad arguments for option \'--features=*\': \'$VIASH_PAR_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts_multiqc) + [ -n "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ] && ViashError Bad arguments for option \'--featurecounts_multiqc\': \'$VIASH_PAR_FEATURECOUNTS_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_multiqc=*) + [ -n "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ] && ViashError Bad arguments for option \'--featurecounts_multiqc=*\': \'$VIASH_PAR_FEATURECOUNTS_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts_rrna_multiqc) + [ -n "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ] && ViashError Bad arguments for option \'--featurecounts_rrna_multiqc\': \'$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_rrna_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_rrna_multiqc=*) + [ -n "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ] && ViashError Bad arguments for option \'--featurecounts_rrna_multiqc=*\': \'$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/multiqc_custom_biotype:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_ID+x} ]; then + VIASH_PAR_ID="\$id" +fi +if [ -z ${VIASH_PAR_FEATURES+x} ]; then + VIASH_PAR_FEATURES="rRNA" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS_MULTIQC+x} ]; then + VIASH_PAR_FEATURECOUNTS_MULTIQC="\$id.biotype_counts_mqc.tsv" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC+x} ]; then + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC="\$id.biotype_counts_rrna_mqc.tsv" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_BIOCOUNTS" ] && [ ! -e "$VIASH_PAR_BIOCOUNTS" ]; then + ViashError "Input file '$VIASH_PAR_BIOCOUNTS' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_FEATURECOUNTS_MULTIQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FEATURECOUNTS_MULTIQC")" +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_BIOCOUNTS" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BIOCOUNTS")" ) + VIASH_PAR_BIOCOUNTS=$(ViashDockerAutodetectMount "$VIASH_PAR_BIOCOUNTS") +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FEATURECOUNTS_MULTIQC")" ) + VIASH_PAR_FEATURECOUNTS_MULTIQC=$(ViashDockerAutodetectMount "$VIASH_PAR_FEATURECOUNTS_MULTIQC") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ) +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC")" ) + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC=$(ViashDockerAutodetectMount "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-multiqc_custom_biotype-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'biocounts': $( if [ ! -z ${VIASH_PAR_BIOCOUNTS+x} ]; then echo "r'${VIASH_PAR_BIOCOUNTS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'id': $( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "r'${VIASH_PAR_ID//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'features': $( if [ ! -z ${VIASH_PAR_FEATURES+x} ]; then echo "r'${VIASH_PAR_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'featurecounts_multiqc': $( if [ ! -z ${VIASH_PAR_FEATURECOUNTS_MULTIQC+x} ]; then echo "r'${VIASH_PAR_FEATURECOUNTS_MULTIQC//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'featurecounts_rrna_multiqc': $( if [ ! -z ${VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC+x} ]; then echo "r'${VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +dep = { + +} + +## VIASH END +#!/usr/bin/env python3 + +import argparse +import logging +import os + +# Create a logger +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__file__) +logger.setLevel(logging.INFO) + +mqc_main = """#id: 'biotype-gs' +#plot_type: 'generalstats' +#pconfig:""" + +mqc_pconf = """# percent_{ft}: +# title: '% {ft}' +# namespace: 'Biotype Counts' +# description: '% reads overlapping {ft} features' +# max: 100 +# min: 0 +# scale: 'RdYlGn-rev' +# format: '{{:.2f}}%'""" + + +def mqc_feature_stat(bfile, features, outfile, sname=None): + # If sample name not given use file name + if not sname: + sname = os.path.splitext(os.path.basename(bfile))[0] + + # Try to parse and read biocount file + fcounts = {} + try: + with open(bfile, "r") as bfl: + for ln in bfl: + if ln.startswith("#"): + continue + ft, cn = ln.strip().split("\\t") + fcounts[ft] = float(cn) + except: + logger.error("Trouble reading the biocount file {}".format(bfile)) + return + + total_count = sum(fcounts.values()) + if total_count == 0: + logger.error("No biocounts found, exiting") + return + + # Calculate percentage for each requested feature + fpercent = {f: (fcounts[f] / total_count) * 100 if f in fcounts else 0 for f in features} + if len(fpercent) == 0: + logger.error("Any of given features '{}' not found in the biocount file".format(", ".join(features), bfile)) + return + + # Prepare the output strings + out_head, out_value, out_mqc = ("Sample", "'{}'".format(sname), mqc_main) + for ft, pt in fpercent.items(): + out_head = "{}\\tpercent_{}".format(out_head, ft) + out_value = "{}\\t{}".format(out_value, pt) + out_mqc = "{}\\n{}".format(out_mqc, mqc_pconf.format(ft=ft)) + + # Write the output to a file + with open(outfile, "w") as ofl: + out_final = "\\n".join([out_mqc, out_head, out_value]).strip() + ofl.write(out_final + "\\n") + + +if __name__ == "__main__": + + # Read the biotypes_header.txt file + biotypes_header_path = os.path.join(meta["resources_dir"], 'biotypes_header.txt') + with open(biotypes_header_path, 'r') as header_file: + biotypes_header = header_file.read() + + # Extract specific columns (1 and 7) and skip the first two lines + filtered_lines = [] + with open(par["biocounts"], 'r') as biocounts_file: + for i, line in enumerate(biocounts_file): + if i >= 2: # Skipping first two lines + columns = line.strip().split('\\t') + filtered_line = f"{columns[0]}\\t{columns[6]}" # Columns 1 and 7 (0-indexed) + filtered_lines.append(filtered_line) + + # Concatenate the header and the processed lines + result = biotypes_header + '\\n'.join(filtered_lines) + '\\n' + + # Write the result to par_featurecounts_multiqc + with open(par["featurecounts_multiqc"], 'w') as output_file: + output_file.write(result) + + mqc_feature_stat(par["featurecounts_multiqc"], par["features"], par["featurecounts_rrna_multiqc"], par["id"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_BIOCOUNTS" ]; then + VIASH_PAR_BIOCOUNTS=$(ViashDockerStripAutomount "$VIASH_PAR_BIOCOUNTS") + fi + if [ ! -z "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ]; then + VIASH_PAR_FEATURECOUNTS_MULTIQC=$(ViashDockerStripAutomount "$VIASH_PAR_FEATURECOUNTS_MULTIQC") + fi + if [ ! -z "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ]; then + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC=$(ViashDockerStripAutomount "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ] && [ ! -e "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ]; then + ViashError "Output file '$VIASH_PAR_FEATURECOUNTS_MULTIQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ] && [ ! -e "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ]; then + ViashError "Output file '$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/multiqc_custom_biotype/nextflow_labels.config b/target/executable/multiqc_custom_biotype/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/multiqc_custom_biotype/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/picard_markduplicates/.config.vsh.yaml b/target/executable/picard_markduplicates/.config.vsh.yaml new file mode 100644 index 0000000..ee40899 --- /dev/null +++ b/target/executable/picard_markduplicates/.config.vsh.yaml @@ -0,0 +1,247 @@ +name: "picard_markduplicates" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--bam" + description: "Input BAM file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fasta" + description: "Reference genome FASTA file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fai" + description: "Reference genome FASTA index" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_picard_args" + description: "Additional argument to be passed to Picard MarkDuplicates" + info: null + default: + - "--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT\ + \ --TMP_DIR tmp" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_bam" + description: "BAM file with duplicate reads marked/removed" + info: null + default: + - "$id.MarkDuplicates.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bai" + description: "An optional BAM index file. If desired, --CREATE_INDEX must be passed\ + \ as a flag" + info: null + default: + - "$id.MarkDuplicates.bam.bai" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--metrics" + description: "Duplicate metrics file generated by picard" + info: null + default: + - "$id.MarkDuplicates.metrics.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Locate and tag duplicate reads in a BAM file\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.paired_end.sorted.bam" +- type: "file" + path: "genome.fasta" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/picard/markduplicates/main.nf" + - "modules/nf-core/picard/markduplicates/meta.yml" + last_sha: "55398de6ab7577acfe9b1180016a93d7af7eb859" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && \\\napt-get install -y build-essential openjdk-17-jdk wget\ + \ && \\\nwget --no-check-certificate https://github.com/broadinstitute/picard/releases/download/3.1.1/picard.jar\ + \ && \\\nmv picard.jar /usr/local/bin \n" + env: + - "PICARD=/usr/local/bin/picard.jar" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/picard_markduplicates/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/picard_markduplicates" + executable: "target/executable/picard_markduplicates/picard_markduplicates" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/picard_markduplicates/nextflow_labels.config b/target/executable/picard_markduplicates/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/picard_markduplicates/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/picard_markduplicates/picard_markduplicates b/target/executable/picard_markduplicates/picard_markduplicates new file mode 100755 index 0000000..b21ce84 --- /dev/null +++ b/target/executable/picard_markduplicates/picard_markduplicates @@ -0,0 +1,1258 @@ +#!/usr/bin/env bash + +# picard_markduplicates v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="picard_markduplicates" +VIASH_META_FUNCTIONALITY_NAME="picard_markduplicates" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +ENV PICARD=/usr/local/bin/picard.jar +RUN apt-get update && \ +apt-get install -y build-essential openjdk-17-jdk wget && \ +wget --no-check-certificate https://github.com/broadinstitute/picard/releases/download/3.1.1/picard.jar && \ +mv picard.jar /usr/local/bin + +LABEL org.opencontainers.image.description="Companion container for running component picard_markduplicates" +LABEL org.opencontainers.image.created="2025-05-07T12:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "picard_markduplicates v0.3.0" + echo "" + echo "Locate and tag duplicate reads in a BAM file" + echo "" + echo "Input:" + echo " --bam" + echo " type: file, file must exist" + echo " Input BAM file" + echo "" + echo " --fasta" + echo " type: file, file must exist" + echo " Reference genome FASTA file" + echo "" + echo " --fai" + echo " type: file, file must exist" + echo " Reference genome FASTA index" + echo "" + echo " --extra_picard_args" + echo " type: string" + echo " default: --ASSUME_SORTED true --REMOVE_DUPLICATES false" + echo "--VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" + echo " Additional argument to be passed to Picard MarkDuplicates" + echo "" + echo "Output:" + echo " --output_bam" + echo " type: file, output, file must exist" + echo " default: \$id.MarkDuplicates.bam" + echo " BAM file with duplicate reads marked/removed" + echo "" + echo " --bai" + echo " type: file, output" + echo " default: \$id.MarkDuplicates.bam.bai" + echo " An optional BAM index file. If desired, --CREATE_INDEX must be passed as" + echo " a flag" + echo "" + echo " --metrics" + echo " type: file, output, file must exist" + echo " default: \$id.MarkDuplicates.metrics.txt" + echo " Duplicate metrics file generated by picard" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "picard_markduplicates v0.3.0" + exit + ;; + --bam) + [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam=*) + [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam=*\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fasta) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fasta=*) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta=*\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fai) + [ -n "$VIASH_PAR_FAI" ] && ViashError Bad arguments for option \'--fai\': \'$VIASH_PAR_FAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAI="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fai. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fai=*) + [ -n "$VIASH_PAR_FAI" ] && ViashError Bad arguments for option \'--fai=*\': \'$VIASH_PAR_FAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAI=$(ViashRemoveFlags "$1") + shift 1 + ;; + --extra_picard_args) + [ -n "$VIASH_PAR_EXTRA_PICARD_ARGS" ] && ViashError Bad arguments for option \'--extra_picard_args\': \'$VIASH_PAR_EXTRA_PICARD_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_PICARD_ARGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_picard_args. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --extra_picard_args=*) + [ -n "$VIASH_PAR_EXTRA_PICARD_ARGS" ] && ViashError Bad arguments for option \'--extra_picard_args=*\': \'$VIASH_PAR_EXTRA_PICARD_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_PICARD_ARGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_bam) + [ -n "$VIASH_PAR_OUTPUT_BAM" ] && ViashError Bad arguments for option \'--output_bam\': \'$VIASH_PAR_OUTPUT_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_BAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_bam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_bam=*) + [ -n "$VIASH_PAR_OUTPUT_BAM" ] && ViashError Bad arguments for option \'--output_bam=*\': \'$VIASH_PAR_OUTPUT_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_BAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bai) + [ -n "$VIASH_PAR_BAI" ] && ViashError Bad arguments for option \'--bai\': \'$VIASH_PAR_BAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAI="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bai. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bai=*) + [ -n "$VIASH_PAR_BAI" ] && ViashError Bad arguments for option \'--bai=*\': \'$VIASH_PAR_BAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAI=$(ViashRemoveFlags "$1") + shift 1 + ;; + --metrics) + [ -n "$VIASH_PAR_METRICS" ] && ViashError Bad arguments for option \'--metrics\': \'$VIASH_PAR_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_METRICS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --metrics. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --metrics=*) + [ -n "$VIASH_PAR_METRICS" ] && ViashError Bad arguments for option \'--metrics=*\': \'$VIASH_PAR_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_METRICS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/picard_markduplicates:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_EXTRA_PICARD_ARGS+x} ]; then + VIASH_PAR_EXTRA_PICARD_ARGS="--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" +fi +if [ -z ${VIASH_PAR_OUTPUT_BAM+x} ]; then + VIASH_PAR_OUTPUT_BAM="\$id.MarkDuplicates.bam" +fi +if [ -z ${VIASH_PAR_BAI+x} ]; then + VIASH_PAR_BAI="\$id.MarkDuplicates.bam.bai" +fi +if [ -z ${VIASH_PAR_METRICS+x} ]; then + VIASH_PAR_METRICS="\$id.MarkDuplicates.metrics.txt" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_BAM" ] && [ ! -e "$VIASH_PAR_BAM" ]; then + ViashError "Input file '$VIASH_PAR_BAM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_FASTA" ] && [ ! -e "$VIASH_PAR_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_FAI" ] && [ ! -e "$VIASH_PAR_FAI" ]; then + ViashError "Input file '$VIASH_PAR_FAI' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_BAM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_BAM")" +fi +if [ ! -z "$VIASH_PAR_BAI" ] && [ ! -d "$(dirname "$VIASH_PAR_BAI")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BAI")" +fi +if [ ! -z "$VIASH_PAR_METRICS" ] && [ ! -d "$(dirname "$VIASH_PAR_METRICS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_METRICS")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_BAM" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BAM")" ) + VIASH_PAR_BAM=$(ViashDockerAutodetectMount "$VIASH_PAR_BAM") +fi +if [ ! -z "$VIASH_PAR_FASTA" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTA")" ) + VIASH_PAR_FASTA=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTA") +fi +if [ ! -z "$VIASH_PAR_FAI" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FAI")" ) + VIASH_PAR_FAI=$(ViashDockerAutodetectMount "$VIASH_PAR_FAI") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_BAM")" ) + VIASH_PAR_OUTPUT_BAM=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_BAM") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_BAM" ) +fi +if [ ! -z "$VIASH_PAR_BAI" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BAI")" ) + VIASH_PAR_BAI=$(ViashDockerAutodetectMount "$VIASH_PAR_BAI") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_BAI" ) +fi +if [ ! -z "$VIASH_PAR_METRICS" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_METRICS")" ) + VIASH_PAR_METRICS=$(ViashDockerAutodetectMount "$VIASH_PAR_METRICS") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_METRICS" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-picard_markduplicates-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\"'\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTA+x} ]; then echo "${VIASH_PAR_FASTA}" | sed "s#'#'\"'\"'#g;s#.*#par_fasta='&'#" ; else echo "# par_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_FAI+x} ]; then echo "${VIASH_PAR_FAI}" | sed "s#'#'\"'\"'#g;s#.*#par_fai='&'#" ; else echo "# par_fai="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRA_PICARD_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_PICARD_ARGS}" | sed "s#'#'\"'\"'#g;s#.*#par_extra_picard_args='&'#" ; else echo "# par_extra_picard_args="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_BAM+x} ]; then echo "${VIASH_PAR_OUTPUT_BAM}" | sed "s#'#'\"'\"'#g;s#.*#par_output_bam='&'#" ; else echo "# par_output_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\"'\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) +$( if [ ! -z ${VIASH_PAR_METRICS+x} ]; then echo "${VIASH_PAR_METRICS}" | sed "s#'#'\"'\"'#g;s#.*#par_metrics='&'#" ; else echo "# par_metrics="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +avail_mem=3072 +if [ ! \$meta_memory_mb ]; then + echo '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' +else + avail_mem=\$(( \$meta_memory_mb*0.8 )) +fi + +java -Xmx\${avail_mem}M -jar \$PICARD MarkDuplicates \\ + \$par_extra_picard_args \\ + --INPUT \$par_bam \\ + --OUTPUT \$par_output_bam \\ + --REFERENCE_SEQUENCE \$par_fasta \\ + --METRICS_FILE \$par_metrics +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_BAM" ]; then + VIASH_PAR_BAM=$(ViashDockerStripAutomount "$VIASH_PAR_BAM") + fi + if [ ! -z "$VIASH_PAR_FASTA" ]; then + VIASH_PAR_FASTA=$(ViashDockerStripAutomount "$VIASH_PAR_FASTA") + fi + if [ ! -z "$VIASH_PAR_FAI" ]; then + VIASH_PAR_FAI=$(ViashDockerStripAutomount "$VIASH_PAR_FAI") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ]; then + VIASH_PAR_OUTPUT_BAM=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_BAM") + fi + if [ ! -z "$VIASH_PAR_BAI" ]; then + VIASH_PAR_BAI=$(ViashDockerStripAutomount "$VIASH_PAR_BAI") + fi + if [ ! -z "$VIASH_PAR_METRICS" ]; then + VIASH_PAR_METRICS=$(ViashDockerStripAutomount "$VIASH_PAR_METRICS") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ] && [ ! -e "$VIASH_PAR_OUTPUT_BAM" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_BAM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_METRICS" ] && [ ! -e "$VIASH_PAR_METRICS" ]; then + ViashError "Output file '$VIASH_PAR_METRICS' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/prepare_multiqc_input/.config.vsh.yaml b/target/executable/prepare_multiqc_input/.config.vsh.yaml new file mode 100644 index 0000000..48c934c --- /dev/null +++ b/target/executable/prepare_multiqc_input/.config.vsh.yaml @@ -0,0 +1,452 @@ +name: "prepare_multiqc_input" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--fail_trimming_multiqc" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--fail_mapping_multiqc" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--fail_strand_multiqc" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_raw_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--fastqc_trim_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--trim_log_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--sortmerna_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--star_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--salmon_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--samtools_stats" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--samtools_flagstat" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--samtools_idxstats" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--markduplicates_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--pseudo_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--featurecounts_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--featurecounts_rrna_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--aligner_pca_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--aligner_clustering_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_aligner_pca_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_aligner_clustering_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--preseq_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--qualimap_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--dupradar_output_dup_intercept_mqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--dupradar_output_duprate_exp_denscurve_mqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--bamstat_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--inferexperiment_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--innerdistance_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--junctionannotation_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--junctionsaturation_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--readdistribution_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--readduplication_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--tin_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--multiqc_config" + description: "Custom multiqc configuration file\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Ouput" + arguments: + - type: "file" + name: "--output" + info: null + default: + - "multiqc_input" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "multiqc_config.yml" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Prepare directory with all the input files for MultiQC.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/prepare_multiqc_input/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/prepare_multiqc_input" + executable: "target/executable/prepare_multiqc_input/prepare_multiqc_input" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/prepare_multiqc_input/multiqc_config.yml b/target/executable/prepare_multiqc_input/multiqc_config.yml new file mode 100644 index 0000000..0ac2fb1 --- /dev/null +++ b/target/executable/prepare_multiqc_input/multiqc_config.yml @@ -0,0 +1,164 @@ +report_comment: > + This report has been generated by the + analysis pipeline. +report_section_order: + "rnaseq-methods-description": + order: -1000 + software_versions: + order: -1001 + "rnaseq.vsh-summary": + order: -1002 + +export_plots: true +disable_version_detection: true + +# Run only these modules +run_modules: + - custom_content + - fastqc + - cutadapt + - fastp + - sortmerna + - star + - rsem + - salmon + - kallisto + - samtools + - picard + - preseq + - rseqc + - qualimap + +# Order of modules +top_modules: + - "fail_trimming" + - "fail_mapping" + - "fail_strand" + - "star_rsem_deseq2_pca" + - "star_rsem_deseq2_clustering" + - "star_salmon_deseq2_pca" + - "star_salmon_deseq2_clustering" + - "salmon_deseq2_pca" + - "salmon_deseq2_clustering" + - "kallisto_deseq2_pca" + - "kallisto_deseq2_clustering" + - "biotype_counts" + - "dupradar" + +module_order: + - fastqc: + name: "FastQC (raw)" + info: "This section of the report shows FastQC results before adapter trimming." + path_filters: + - "*.read_*.fastqc.zip" + - cutadapt + - fastp + - fastqc: + name: "FastQC (trimmed)" + info: "This section of the report shows FastQC results after adapter trimming." + path_filters: + - "*.trimgalore.read_*.fastqc.zip" + +# Don't show % Dups in the General Stats table (we have this from Picard) +table_columns_visible: + fastqc: + percent_duplicates: False + +extra_fn_clean_extn: + # - ".mapping_quality" + # - ".MarkDuplicates_flagstat.output.flagstat" + # - ".MarkDuplicates_idxstats.output.idxstats" + # - ".MarkDuplicates_stats.output.txt" + # - ".genome_sorted_MarkDuplicates.output.bam" + # - ".genome_sorted_MarkDuplicates" + - ".read_1" + - ".read_2" + +# See https://github.com/ewels/MultiQC_TestData/blob/master/data/custom_content/with_config/table_headerconfig/multiqc_config.yaml +custom_data: + fail_trimming: + section_name: "WARNING: Fail Trimming Check" + description: "List of samples that failed the minimum trimmed reads threshold specified via the '--min_trimmed_reads' parameter, and hence were ignored for the downstream processing steps." + plot_type: "table" + pconfig: + id: "fail_trimmed_samples_table" + table_title: "Samples failed trimming threshold" + namespace: "Samples failed trimming threshold" + format: "{:.0f}" + fail_mapping: + section_name: "WARNING: Fail Alignment Check" + description: "List of samples that failed the STAR minimum mapped reads threshold specified via the '--min_mapped_reads' parameter, and hence were ignored for the downstream processing steps." + plot_type: "table" + pconfig: + id: "fail_mapped_samples_table" + table_title: "Samples failed mapping threshold" + namespace: "Samples failed mapping threshold" + format: "{:.2f}" + fail_strand: + section_name: "WARNING: Fail Strand Check" + description: "List of samples that failed the strandedness check between that provided in the samplesheet and calculated by the RSeQC infer_experiment.py tool." + plot_type: "table" + pconfig: + id: "fail_strand_check_table" + table_title: "Samples failed strandedness check" + namespace: "Samples failed strandedness check" + format: "{:.2f}" + +# Customise the module search patterns to speed up execution time +# - Skip module sub-tools that we are not interested in +# - Replace file-content searching with filename pattern searching +# - Don't add anything that is the same as the MultiQC default +# See https://multiqc.info/docs/#optimise-file-search-patterns for details +sp: + + fastqc/zip: + fn: "*.fastqc.zip" + + cutadapt: + fn: "*.trimming_report*.txt" + + fastp: + fn: "*.fastp_out.json" + + sortmerna: + fn: "*sortmerna*.log" + + star: + fn: "*.star_align_reads.log.txt" + + # hisat2: + # fn: "*.hisat2.summary.log" + # salmon: + # fn: "*meta_info.json" + + preseq: + fn: "*.lc_extrap.txt" + + samtools/stats: + fn: "*_stats.output.txt" + samtools/flagstat: + fn: "*_flagstat.output.flagstat" + samtools/idxstats: + fn: "*_idxstats.output.idxstats" + + rseqc/bam_stat: + fn: "*.mapping_quality.txt" + rseqc/junction_saturation: + fn: "*.junction_saturation_plot.r" + rseqc/junction_annotation: + fn: "*.junction_annotation.log" + rseqc/read_duplication_pos: + fn: "*.duplication_rate_mapping.xls" + rseqc/read_distribution: + fn: "*.read_distribution.txt" + rseqc/infer_experiment: + fn: "*.strandedness.txt" + rseqc/inner_distance: + fn: "*.inner_distance_freq.txt" + rseqc/tin: + fn: "*.tin_summary.txt" + + picard/markdups: + fn: "*.MarkDuplicates.metrics.txt" + +skip_versions_section: true diff --git a/target/executable/prepare_multiqc_input/nextflow_labels.config b/target/executable/prepare_multiqc_input/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/prepare_multiqc_input/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/prepare_multiqc_input/prepare_multiqc_input b/target/executable/prepare_multiqc_input/prepare_multiqc_input new file mode 100755 index 0000000..3265c8d --- /dev/null +++ b/target/executable/prepare_multiqc_input/prepare_multiqc_input @@ -0,0 +1,2755 @@ +#!/usr/bin/env bash + +# prepare_multiqc_input v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="prepare_multiqc_input" +VIASH_META_FUNCTIONALITY_NAME="prepare_multiqc_input" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +LABEL org.opencontainers.image.description="Companion container for running component prepare_multiqc_input" +LABEL org.opencontainers.image.created="2025-05-07T12:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "prepare_multiqc_input v0.3.0" + echo "" + echo "Prepare directory with all the input files for MultiQC." + echo "" + echo "Input:" + echo " --fail_trimming_multiqc" + echo " type: string" + echo "" + echo " --fail_mapping_multiqc" + echo " type: string" + echo "" + echo " --fail_strand_multiqc" + echo " type: string" + echo "" + echo " --fastqc_raw_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --fastqc_trim_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --trim_log_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --sortmerna_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --star_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --salmon_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --samtools_stats" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --samtools_flagstat" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --samtools_idxstats" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --markduplicates_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --pseudo_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --featurecounts_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --featurecounts_rrna_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --aligner_pca_multiqc" + echo " type: file, file must exist" + echo "" + echo " --aligner_clustering_multiqc" + echo " type: file, file must exist" + echo "" + echo " --pseudo_aligner_pca_multiqc" + echo " type: file, file must exist" + echo "" + echo " --pseudo_aligner_clustering_multiqc" + echo " type: file, file must exist" + echo "" + echo " --preseq_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --qualimap_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --dupradar_output_dup_intercept_mqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --dupradar_output_duprate_exp_denscurve_mqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --bamstat_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --inferexperiment_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --innerdistance_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --junctionannotation_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --junctionsaturation_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --readdistribution_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --readduplication_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --tin_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --multiqc_config" + echo " type: file, file must exist" + echo " Custom multiqc configuration file" + echo "" + echo "Ouput:" + echo " --output" + echo " type: file, output, file must exist" + echo " default: multiqc_input" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "prepare_multiqc_input v0.3.0" + exit + ;; + --fail_trimming_multiqc) + [ -n "$VIASH_PAR_FAIL_TRIMMING_MULTIQC" ] && ViashError Bad arguments for option \'--fail_trimming_multiqc\': \'$VIASH_PAR_FAIL_TRIMMING_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAIL_TRIMMING_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fail_trimming_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fail_trimming_multiqc=*) + [ -n "$VIASH_PAR_FAIL_TRIMMING_MULTIQC" ] && ViashError Bad arguments for option \'--fail_trimming_multiqc=*\': \'$VIASH_PAR_FAIL_TRIMMING_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAIL_TRIMMING_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fail_mapping_multiqc) + [ -n "$VIASH_PAR_FAIL_MAPPING_MULTIQC" ] && ViashError Bad arguments for option \'--fail_mapping_multiqc\': \'$VIASH_PAR_FAIL_MAPPING_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAIL_MAPPING_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fail_mapping_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fail_mapping_multiqc=*) + [ -n "$VIASH_PAR_FAIL_MAPPING_MULTIQC" ] && ViashError Bad arguments for option \'--fail_mapping_multiqc=*\': \'$VIASH_PAR_FAIL_MAPPING_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAIL_MAPPING_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fail_strand_multiqc) + [ -n "$VIASH_PAR_FAIL_STRAND_MULTIQC" ] && ViashError Bad arguments for option \'--fail_strand_multiqc\': \'$VIASH_PAR_FAIL_STRAND_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAIL_STRAND_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fail_strand_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fail_strand_multiqc=*) + [ -n "$VIASH_PAR_FAIL_STRAND_MULTIQC" ] && ViashError Bad arguments for option \'--fail_strand_multiqc=*\': \'$VIASH_PAR_FAIL_STRAND_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAIL_STRAND_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastqc_raw_multiqc) + if [ -z "$VIASH_PAR_FASTQC_RAW_MULTIQC" ]; then + VIASH_PAR_FASTQC_RAW_MULTIQC="$2" + else + VIASH_PAR_FASTQC_RAW_MULTIQC="$VIASH_PAR_FASTQC_RAW_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_raw_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastqc_raw_multiqc=*) + if [ -z "$VIASH_PAR_FASTQC_RAW_MULTIQC" ]; then + VIASH_PAR_FASTQC_RAW_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_FASTQC_RAW_MULTIQC="$VIASH_PAR_FASTQC_RAW_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --fastqc_trim_multiqc) + if [ -z "$VIASH_PAR_FASTQC_TRIM_MULTIQC" ]; then + VIASH_PAR_FASTQC_TRIM_MULTIQC="$2" + else + VIASH_PAR_FASTQC_TRIM_MULTIQC="$VIASH_PAR_FASTQC_TRIM_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_trim_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastqc_trim_multiqc=*) + if [ -z "$VIASH_PAR_FASTQC_TRIM_MULTIQC" ]; then + VIASH_PAR_FASTQC_TRIM_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_FASTQC_TRIM_MULTIQC="$VIASH_PAR_FASTQC_TRIM_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --trim_log_multiqc) + if [ -z "$VIASH_PAR_TRIM_LOG_MULTIQC" ]; then + VIASH_PAR_TRIM_LOG_MULTIQC="$2" + else + VIASH_PAR_TRIM_LOG_MULTIQC="$VIASH_PAR_TRIM_LOG_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_log_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_log_multiqc=*) + if [ -z "$VIASH_PAR_TRIM_LOG_MULTIQC" ]; then + VIASH_PAR_TRIM_LOG_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_TRIM_LOG_MULTIQC="$VIASH_PAR_TRIM_LOG_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sortmerna_multiqc) + if [ -z "$VIASH_PAR_SORTMERNA_MULTIQC" ]; then + VIASH_PAR_SORTMERNA_MULTIQC="$2" + else + VIASH_PAR_SORTMERNA_MULTIQC="$VIASH_PAR_SORTMERNA_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sortmerna_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sortmerna_multiqc=*) + if [ -z "$VIASH_PAR_SORTMERNA_MULTIQC" ]; then + VIASH_PAR_SORTMERNA_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SORTMERNA_MULTIQC="$VIASH_PAR_SORTMERNA_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --star_multiqc) + if [ -z "$VIASH_PAR_STAR_MULTIQC" ]; then + VIASH_PAR_STAR_MULTIQC="$2" + else + VIASH_PAR_STAR_MULTIQC="$VIASH_PAR_STAR_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_multiqc=*) + if [ -z "$VIASH_PAR_STAR_MULTIQC" ]; then + VIASH_PAR_STAR_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_STAR_MULTIQC="$VIASH_PAR_STAR_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --salmon_multiqc) + if [ -z "$VIASH_PAR_SALMON_MULTIQC" ]; then + VIASH_PAR_SALMON_MULTIQC="$2" + else + VIASH_PAR_SALMON_MULTIQC="$VIASH_PAR_SALMON_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_multiqc=*) + if [ -z "$VIASH_PAR_SALMON_MULTIQC" ]; then + VIASH_PAR_SALMON_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SALMON_MULTIQC="$VIASH_PAR_SALMON_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --samtools_stats) + if [ -z "$VIASH_PAR_SAMTOOLS_STATS" ]; then + VIASH_PAR_SAMTOOLS_STATS="$2" + else + VIASH_PAR_SAMTOOLS_STATS="$VIASH_PAR_SAMTOOLS_STATS,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --samtools_stats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --samtools_stats=*) + if [ -z "$VIASH_PAR_SAMTOOLS_STATS" ]; then + VIASH_PAR_SAMTOOLS_STATS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SAMTOOLS_STATS="$VIASH_PAR_SAMTOOLS_STATS,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --samtools_flagstat) + if [ -z "$VIASH_PAR_SAMTOOLS_FLAGSTAT" ]; then + VIASH_PAR_SAMTOOLS_FLAGSTAT="$2" + else + VIASH_PAR_SAMTOOLS_FLAGSTAT="$VIASH_PAR_SAMTOOLS_FLAGSTAT,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --samtools_flagstat. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --samtools_flagstat=*) + if [ -z "$VIASH_PAR_SAMTOOLS_FLAGSTAT" ]; then + VIASH_PAR_SAMTOOLS_FLAGSTAT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SAMTOOLS_FLAGSTAT="$VIASH_PAR_SAMTOOLS_FLAGSTAT,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --samtools_idxstats) + if [ -z "$VIASH_PAR_SAMTOOLS_IDXSTATS" ]; then + VIASH_PAR_SAMTOOLS_IDXSTATS="$2" + else + VIASH_PAR_SAMTOOLS_IDXSTATS="$VIASH_PAR_SAMTOOLS_IDXSTATS,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --samtools_idxstats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --samtools_idxstats=*) + if [ -z "$VIASH_PAR_SAMTOOLS_IDXSTATS" ]; then + VIASH_PAR_SAMTOOLS_IDXSTATS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SAMTOOLS_IDXSTATS="$VIASH_PAR_SAMTOOLS_IDXSTATS,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --markduplicates_multiqc) + if [ -z "$VIASH_PAR_MARKDUPLICATES_MULTIQC" ]; then + VIASH_PAR_MARKDUPLICATES_MULTIQC="$2" + else + VIASH_PAR_MARKDUPLICATES_MULTIQC="$VIASH_PAR_MARKDUPLICATES_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --markduplicates_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --markduplicates_multiqc=*) + if [ -z "$VIASH_PAR_MARKDUPLICATES_MULTIQC" ]; then + VIASH_PAR_MARKDUPLICATES_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_MARKDUPLICATES_MULTIQC="$VIASH_PAR_MARKDUPLICATES_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --pseudo_multiqc) + if [ -z "$VIASH_PAR_PSEUDO_MULTIQC" ]; then + VIASH_PAR_PSEUDO_MULTIQC="$2" + else + VIASH_PAR_PSEUDO_MULTIQC="$VIASH_PAR_PSEUDO_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_multiqc=*) + if [ -z "$VIASH_PAR_PSEUDO_MULTIQC" ]; then + VIASH_PAR_PSEUDO_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_PSEUDO_MULTIQC="$VIASH_PAR_PSEUDO_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --featurecounts_multiqc) + if [ -z "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ]; then + VIASH_PAR_FEATURECOUNTS_MULTIQC="$2" + else + VIASH_PAR_FEATURECOUNTS_MULTIQC="$VIASH_PAR_FEATURECOUNTS_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_multiqc=*) + if [ -z "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ]; then + VIASH_PAR_FEATURECOUNTS_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_FEATURECOUNTS_MULTIQC="$VIASH_PAR_FEATURECOUNTS_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --featurecounts_rrna_multiqc) + if [ -z "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ]; then + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC="$2" + else + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC="$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_rrna_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_rrna_multiqc=*) + if [ -z "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ]; then + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC="$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --aligner_pca_multiqc) + [ -n "$VIASH_PAR_ALIGNER_PCA_MULTIQC" ] && ViashError Bad arguments for option \'--aligner_pca_multiqc\': \'$VIASH_PAR_ALIGNER_PCA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNER_PCA_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --aligner_pca_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --aligner_pca_multiqc=*) + [ -n "$VIASH_PAR_ALIGNER_PCA_MULTIQC" ] && ViashError Bad arguments for option \'--aligner_pca_multiqc=*\': \'$VIASH_PAR_ALIGNER_PCA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNER_PCA_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --aligner_clustering_multiqc) + [ -n "$VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC" ] && ViashError Bad arguments for option \'--aligner_clustering_multiqc\': \'$VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --aligner_clustering_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --aligner_clustering_multiqc=*) + [ -n "$VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC" ] && ViashError Bad arguments for option \'--aligner_clustering_multiqc=*\': \'$VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_aligner_pca_multiqc) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC" ] && ViashError Bad arguments for option \'--pseudo_aligner_pca_multiqc\': \'$VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_aligner_pca_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_aligner_pca_multiqc=*) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC" ] && ViashError Bad arguments for option \'--pseudo_aligner_pca_multiqc=*\': \'$VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_aligner_clustering_multiqc) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC" ] && ViashError Bad arguments for option \'--pseudo_aligner_clustering_multiqc\': \'$VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_aligner_clustering_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_aligner_clustering_multiqc=*) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC" ] && ViashError Bad arguments for option \'--pseudo_aligner_clustering_multiqc=*\': \'$VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --preseq_multiqc) + if [ -z "$VIASH_PAR_PRESEQ_MULTIQC" ]; then + VIASH_PAR_PRESEQ_MULTIQC="$2" + else + VIASH_PAR_PRESEQ_MULTIQC="$VIASH_PAR_PRESEQ_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --preseq_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --preseq_multiqc=*) + if [ -z "$VIASH_PAR_PRESEQ_MULTIQC" ]; then + VIASH_PAR_PRESEQ_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_PRESEQ_MULTIQC="$VIASH_PAR_PRESEQ_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --qualimap_multiqc) + if [ -z "$VIASH_PAR_QUALIMAP_MULTIQC" ]; then + VIASH_PAR_QUALIMAP_MULTIQC="$2" + else + VIASH_PAR_QUALIMAP_MULTIQC="$VIASH_PAR_QUALIMAP_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --qualimap_multiqc=*) + if [ -z "$VIASH_PAR_QUALIMAP_MULTIQC" ]; then + VIASH_PAR_QUALIMAP_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_QUALIMAP_MULTIQC="$VIASH_PAR_QUALIMAP_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --dupradar_output_dup_intercept_mqc) + if [ -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC="$2" + else + VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC="$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_dup_intercept_mqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_dup_intercept_mqc=*) + if [ -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC="$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --dupradar_output_duprate_exp_denscurve_mqc) + if [ -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC="$2" + else + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC="$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_duprate_exp_denscurve_mqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_duprate_exp_denscurve_mqc=*) + if [ -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC="$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --bamstat_multiqc) + if [ -z "$VIASH_PAR_BAMSTAT_MULTIQC" ]; then + VIASH_PAR_BAMSTAT_MULTIQC="$2" + else + VIASH_PAR_BAMSTAT_MULTIQC="$VIASH_PAR_BAMSTAT_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamstat_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bamstat_multiqc=*) + if [ -z "$VIASH_PAR_BAMSTAT_MULTIQC" ]; then + VIASH_PAR_BAMSTAT_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_BAMSTAT_MULTIQC="$VIASH_PAR_BAMSTAT_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --inferexperiment_multiqc) + if [ -z "$VIASH_PAR_INFEREXPERIMENT_MULTIQC" ]; then + VIASH_PAR_INFEREXPERIMENT_MULTIQC="$2" + else + VIASH_PAR_INFEREXPERIMENT_MULTIQC="$VIASH_PAR_INFEREXPERIMENT_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --inferexperiment_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --inferexperiment_multiqc=*) + if [ -z "$VIASH_PAR_INFEREXPERIMENT_MULTIQC" ]; then + VIASH_PAR_INFEREXPERIMENT_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INFEREXPERIMENT_MULTIQC="$VIASH_PAR_INFEREXPERIMENT_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --innerdistance_multiqc) + if [ -z "$VIASH_PAR_INNERDISTANCE_MULTIQC" ]; then + VIASH_PAR_INNERDISTANCE_MULTIQC="$2" + else + VIASH_PAR_INNERDISTANCE_MULTIQC="$VIASH_PAR_INNERDISTANCE_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --innerdistance_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --innerdistance_multiqc=*) + if [ -z "$VIASH_PAR_INNERDISTANCE_MULTIQC" ]; then + VIASH_PAR_INNERDISTANCE_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INNERDISTANCE_MULTIQC="$VIASH_PAR_INNERDISTANCE_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --junctionannotation_multiqc) + if [ -z "$VIASH_PAR_JUNCTIONANNOTATION_MULTIQC" ]; then + VIASH_PAR_JUNCTIONANNOTATION_MULTIQC="$2" + else + VIASH_PAR_JUNCTIONANNOTATION_MULTIQC="$VIASH_PAR_JUNCTIONANNOTATION_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junctionannotation_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junctionannotation_multiqc=*) + if [ -z "$VIASH_PAR_JUNCTIONANNOTATION_MULTIQC" ]; then + VIASH_PAR_JUNCTIONANNOTATION_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_JUNCTIONANNOTATION_MULTIQC="$VIASH_PAR_JUNCTIONANNOTATION_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --junctionsaturation_multiqc) + if [ -z "$VIASH_PAR_JUNCTIONSATURATION_MULTIQC" ]; then + VIASH_PAR_JUNCTIONSATURATION_MULTIQC="$2" + else + VIASH_PAR_JUNCTIONSATURATION_MULTIQC="$VIASH_PAR_JUNCTIONSATURATION_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junctionsaturation_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junctionsaturation_multiqc=*) + if [ -z "$VIASH_PAR_JUNCTIONSATURATION_MULTIQC" ]; then + VIASH_PAR_JUNCTIONSATURATION_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_JUNCTIONSATURATION_MULTIQC="$VIASH_PAR_JUNCTIONSATURATION_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --readdistribution_multiqc) + if [ -z "$VIASH_PAR_READDISTRIBUTION_MULTIQC" ]; then + VIASH_PAR_READDISTRIBUTION_MULTIQC="$2" + else + VIASH_PAR_READDISTRIBUTION_MULTIQC="$VIASH_PAR_READDISTRIBUTION_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readdistribution_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readdistribution_multiqc=*) + if [ -z "$VIASH_PAR_READDISTRIBUTION_MULTIQC" ]; then + VIASH_PAR_READDISTRIBUTION_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_READDISTRIBUTION_MULTIQC="$VIASH_PAR_READDISTRIBUTION_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --readduplication_multiqc) + if [ -z "$VIASH_PAR_READDUPLICATION_MULTIQC" ]; then + VIASH_PAR_READDUPLICATION_MULTIQC="$2" + else + VIASH_PAR_READDUPLICATION_MULTIQC="$VIASH_PAR_READDUPLICATION_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readduplication_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readduplication_multiqc=*) + if [ -z "$VIASH_PAR_READDUPLICATION_MULTIQC" ]; then + VIASH_PAR_READDUPLICATION_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_READDUPLICATION_MULTIQC="$VIASH_PAR_READDUPLICATION_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --tin_multiqc) + if [ -z "$VIASH_PAR_TIN_MULTIQC" ]; then + VIASH_PAR_TIN_MULTIQC="$2" + else + VIASH_PAR_TIN_MULTIQC="$VIASH_PAR_TIN_MULTIQC,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tin_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tin_multiqc=*) + if [ -z "$VIASH_PAR_TIN_MULTIQC" ]; then + VIASH_PAR_TIN_MULTIQC=$(ViashRemoveFlags "$1") + else + VIASH_PAR_TIN_MULTIQC="$VIASH_PAR_TIN_MULTIQC,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --multiqc_config) + [ -n "$VIASH_PAR_MULTIQC_CONFIG" ] && ViashError Bad arguments for option \'--multiqc_config\': \'$VIASH_PAR_MULTIQC_CONFIG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_CONFIG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_config. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_config=*) + [ -n "$VIASH_PAR_MULTIQC_CONFIG" ] && ViashError Bad arguments for option \'--multiqc_config=*\': \'$VIASH_PAR_MULTIQC_CONFIG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_CONFIG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/prepare_multiqc_input:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="multiqc_input" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTQC_RAW_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_FASTQC_RAW_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_FASTQC_TRIM_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_FASTQC_TRIM_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_TRIM_LOG_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_TRIM_LOG_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_SORTMERNA_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_SORTMERNA_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_STAR_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_STAR_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_SALMON_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_SALMON_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_SAMTOOLS_STATS" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_SAMTOOLS_STATS; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_SAMTOOLS_FLAGSTAT" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_SAMTOOLS_FLAGSTAT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_SAMTOOLS_IDXSTATS" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_SAMTOOLS_IDXSTATS; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_MARKDUPLICATES_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_MARKDUPLICATES_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_PSEUDO_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_PSEUDO_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_FEATURECOUNTS_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_ALIGNER_PCA_MULTIQC" ] && [ ! -e "$VIASH_PAR_ALIGNER_PCA_MULTIQC" ]; then + ViashError "Input file '$VIASH_PAR_ALIGNER_PCA_MULTIQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC" ] && [ ! -e "$VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC" ]; then + ViashError "Input file '$VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC" ] && [ ! -e "$VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC" ]; then + ViashError "Input file '$VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC" ] && [ ! -e "$VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC" ]; then + ViashError "Input file '$VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PRESEQ_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_PRESEQ_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_QUALIMAP_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_BAMSTAT_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_BAMSTAT_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_INFEREXPERIMENT_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_INFEREXPERIMENT_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_INNERDISTANCE_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_INNERDISTANCE_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_JUNCTIONANNOTATION_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_JUNCTIONANNOTATION_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_JUNCTIONSATURATION_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_JUNCTIONSATURATION_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_READDISTRIBUTION_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_READDISTRIBUTION_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_READDUPLICATION_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_READDUPLICATION_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_TIN_MULTIQC" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_TIN_MULTIQC; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_MULTIQC_CONFIG" ] && [ ! -e "$VIASH_PAR_MULTIQC_CONFIG" ]; then + ViashError "Input file '$VIASH_PAR_MULTIQC_CONFIG' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_FASTQC_RAW_MULTIQC" ]; then + VIASH_TEST_FASTQC_RAW_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_FASTQC_RAW_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_FASTQC_RAW_MULTIQC+=( "$var" ) + done + VIASH_PAR_FASTQC_RAW_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_FASTQC_RAW_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_FASTQC_TRIM_MULTIQC" ]; then + VIASH_TEST_FASTQC_TRIM_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_FASTQC_TRIM_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_FASTQC_TRIM_MULTIQC+=( "$var" ) + done + VIASH_PAR_FASTQC_TRIM_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_FASTQC_TRIM_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_TRIM_LOG_MULTIQC" ]; then + VIASH_TEST_TRIM_LOG_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_TRIM_LOG_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_TRIM_LOG_MULTIQC+=( "$var" ) + done + VIASH_PAR_TRIM_LOG_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_TRIM_LOG_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_SORTMERNA_MULTIQC" ]; then + VIASH_TEST_SORTMERNA_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_SORTMERNA_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_SORTMERNA_MULTIQC+=( "$var" ) + done + VIASH_PAR_SORTMERNA_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_SORTMERNA_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_STAR_MULTIQC" ]; then + VIASH_TEST_STAR_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_STAR_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_STAR_MULTIQC+=( "$var" ) + done + VIASH_PAR_STAR_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_STAR_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_SALMON_MULTIQC" ]; then + VIASH_TEST_SALMON_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_SALMON_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_SALMON_MULTIQC+=( "$var" ) + done + VIASH_PAR_SALMON_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_SALMON_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_SAMTOOLS_STATS" ]; then + VIASH_TEST_SAMTOOLS_STATS=() + IFS=',' + for var in $VIASH_PAR_SAMTOOLS_STATS; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_SAMTOOLS_STATS+=( "$var" ) + done + VIASH_PAR_SAMTOOLS_STATS=$(IFS=',' ; echo "${VIASH_TEST_SAMTOOLS_STATS[*]}") +fi +if [ ! -z "$VIASH_PAR_SAMTOOLS_FLAGSTAT" ]; then + VIASH_TEST_SAMTOOLS_FLAGSTAT=() + IFS=',' + for var in $VIASH_PAR_SAMTOOLS_FLAGSTAT; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_SAMTOOLS_FLAGSTAT+=( "$var" ) + done + VIASH_PAR_SAMTOOLS_FLAGSTAT=$(IFS=',' ; echo "${VIASH_TEST_SAMTOOLS_FLAGSTAT[*]}") +fi +if [ ! -z "$VIASH_PAR_SAMTOOLS_IDXSTATS" ]; then + VIASH_TEST_SAMTOOLS_IDXSTATS=() + IFS=',' + for var in $VIASH_PAR_SAMTOOLS_IDXSTATS; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_SAMTOOLS_IDXSTATS+=( "$var" ) + done + VIASH_PAR_SAMTOOLS_IDXSTATS=$(IFS=',' ; echo "${VIASH_TEST_SAMTOOLS_IDXSTATS[*]}") +fi +if [ ! -z "$VIASH_PAR_MARKDUPLICATES_MULTIQC" ]; then + VIASH_TEST_MARKDUPLICATES_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_MARKDUPLICATES_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_MARKDUPLICATES_MULTIQC+=( "$var" ) + done + VIASH_PAR_MARKDUPLICATES_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_MARKDUPLICATES_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_PSEUDO_MULTIQC" ]; then + VIASH_TEST_PSEUDO_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_PSEUDO_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_PSEUDO_MULTIQC+=( "$var" ) + done + VIASH_PAR_PSEUDO_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_PSEUDO_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ]; then + VIASH_TEST_FEATURECOUNTS_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_FEATURECOUNTS_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_FEATURECOUNTS_MULTIQC+=( "$var" ) + done + VIASH_PAR_FEATURECOUNTS_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_FEATURECOUNTS_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ]; then + VIASH_TEST_FEATURECOUNTS_RRNA_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_FEATURECOUNTS_RRNA_MULTIQC+=( "$var" ) + done + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_FEATURECOUNTS_RRNA_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_ALIGNER_PCA_MULTIQC" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_ALIGNER_PCA_MULTIQC")" ) + VIASH_PAR_ALIGNER_PCA_MULTIQC=$(ViashDockerAutodetectMount "$VIASH_PAR_ALIGNER_PCA_MULTIQC") +fi +if [ ! -z "$VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC")" ) + VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC=$(ViashDockerAutodetectMount "$VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC") +fi +if [ ! -z "$VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC")" ) + VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC=$(ViashDockerAutodetectMount "$VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC") +fi +if [ ! -z "$VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC")" ) + VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC=$(ViashDockerAutodetectMount "$VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC") +fi +if [ ! -z "$VIASH_PAR_PRESEQ_MULTIQC" ]; then + VIASH_TEST_PRESEQ_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_PRESEQ_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_PRESEQ_MULTIQC+=( "$var" ) + done + VIASH_PAR_PRESEQ_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_PRESEQ_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_MULTIQC" ]; then + VIASH_TEST_QUALIMAP_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_QUALIMAP_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_QUALIMAP_MULTIQC+=( "$var" ) + done + VIASH_PAR_QUALIMAP_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_QUALIMAP_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ]; then + VIASH_TEST_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC=() + IFS=',' + for var in $VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC+=( "$var" ) + done + VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC=$(IFS=',' ; echo "${VIASH_TEST_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC[*]}") +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ]; then + VIASH_TEST_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC=() + IFS=',' + for var in $VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC+=( "$var" ) + done + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC=$(IFS=',' ; echo "${VIASH_TEST_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC[*]}") +fi +if [ ! -z "$VIASH_PAR_BAMSTAT_MULTIQC" ]; then + VIASH_TEST_BAMSTAT_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_BAMSTAT_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_BAMSTAT_MULTIQC+=( "$var" ) + done + VIASH_PAR_BAMSTAT_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_BAMSTAT_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_INFEREXPERIMENT_MULTIQC" ]; then + VIASH_TEST_INFEREXPERIMENT_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_INFEREXPERIMENT_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_INFEREXPERIMENT_MULTIQC+=( "$var" ) + done + VIASH_PAR_INFEREXPERIMENT_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_INFEREXPERIMENT_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_INNERDISTANCE_MULTIQC" ]; then + VIASH_TEST_INNERDISTANCE_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_INNERDISTANCE_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_INNERDISTANCE_MULTIQC+=( "$var" ) + done + VIASH_PAR_INNERDISTANCE_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_INNERDISTANCE_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_JUNCTIONANNOTATION_MULTIQC" ]; then + VIASH_TEST_JUNCTIONANNOTATION_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_JUNCTIONANNOTATION_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_JUNCTIONANNOTATION_MULTIQC+=( "$var" ) + done + VIASH_PAR_JUNCTIONANNOTATION_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_JUNCTIONANNOTATION_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_JUNCTIONSATURATION_MULTIQC" ]; then + VIASH_TEST_JUNCTIONSATURATION_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_JUNCTIONSATURATION_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_JUNCTIONSATURATION_MULTIQC+=( "$var" ) + done + VIASH_PAR_JUNCTIONSATURATION_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_JUNCTIONSATURATION_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_READDISTRIBUTION_MULTIQC" ]; then + VIASH_TEST_READDISTRIBUTION_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_READDISTRIBUTION_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_READDISTRIBUTION_MULTIQC+=( "$var" ) + done + VIASH_PAR_READDISTRIBUTION_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_READDISTRIBUTION_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_READDUPLICATION_MULTIQC" ]; then + VIASH_TEST_READDUPLICATION_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_READDUPLICATION_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_READDUPLICATION_MULTIQC+=( "$var" ) + done + VIASH_PAR_READDUPLICATION_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_READDUPLICATION_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_TIN_MULTIQC" ]; then + VIASH_TEST_TIN_MULTIQC=() + IFS=',' + for var in $VIASH_PAR_TIN_MULTIQC; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_TIN_MULTIQC+=( "$var" ) + done + VIASH_PAR_TIN_MULTIQC=$(IFS=',' ; echo "${VIASH_TEST_TIN_MULTIQC[*]}") +fi +if [ ! -z "$VIASH_PAR_MULTIQC_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_MULTIQC_CONFIG")" ) + VIASH_PAR_MULTIQC_CONFIG=$(ViashDockerAutodetectMount "$VIASH_PAR_MULTIQC_CONFIG") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-prepare_multiqc_input-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_FAIL_TRIMMING_MULTIQC+x} ]; then echo "${VIASH_PAR_FAIL_TRIMMING_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_fail_trimming_multiqc='&'#" ; else echo "# par_fail_trimming_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_FAIL_MAPPING_MULTIQC+x} ]; then echo "${VIASH_PAR_FAIL_MAPPING_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_fail_mapping_multiqc='&'#" ; else echo "# par_fail_mapping_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_FAIL_STRAND_MULTIQC+x} ]; then echo "${VIASH_PAR_FAIL_STRAND_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_fail_strand_multiqc='&'#" ; else echo "# par_fail_strand_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQC_RAW_MULTIQC+x} ]; then echo "${VIASH_PAR_FASTQC_RAW_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_fastqc_raw_multiqc='&'#" ; else echo "# par_fastqc_raw_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQC_TRIM_MULTIQC+x} ]; then echo "${VIASH_PAR_FASTQC_TRIM_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_fastqc_trim_multiqc='&'#" ; else echo "# par_fastqc_trim_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIM_LOG_MULTIQC+x} ]; then echo "${VIASH_PAR_TRIM_LOG_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_trim_log_multiqc='&'#" ; else echo "# par_trim_log_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_SORTMERNA_MULTIQC+x} ]; then echo "${VIASH_PAR_SORTMERNA_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_sortmerna_multiqc='&'#" ; else echo "# par_sortmerna_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_STAR_MULTIQC+x} ]; then echo "${VIASH_PAR_STAR_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_star_multiqc='&'#" ; else echo "# par_star_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_SALMON_MULTIQC+x} ]; then echo "${VIASH_PAR_SALMON_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_salmon_multiqc='&'#" ; else echo "# par_salmon_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMTOOLS_STATS+x} ]; then echo "${VIASH_PAR_SAMTOOLS_STATS}" | sed "s#'#'\"'\"'#g;s#.*#par_samtools_stats='&'#" ; else echo "# par_samtools_stats="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMTOOLS_FLAGSTAT+x} ]; then echo "${VIASH_PAR_SAMTOOLS_FLAGSTAT}" | sed "s#'#'\"'\"'#g;s#.*#par_samtools_flagstat='&'#" ; else echo "# par_samtools_flagstat="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMTOOLS_IDXSTATS+x} ]; then echo "${VIASH_PAR_SAMTOOLS_IDXSTATS}" | sed "s#'#'\"'\"'#g;s#.*#par_samtools_idxstats='&'#" ; else echo "# par_samtools_idxstats="; fi ) +$( if [ ! -z ${VIASH_PAR_MARKDUPLICATES_MULTIQC+x} ]; then echo "${VIASH_PAR_MARKDUPLICATES_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_markduplicates_multiqc='&'#" ; else echo "# par_markduplicates_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_PSEUDO_MULTIQC+x} ]; then echo "${VIASH_PAR_PSEUDO_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_pseudo_multiqc='&'#" ; else echo "# par_pseudo_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_FEATURECOUNTS_MULTIQC+x} ]; then echo "${VIASH_PAR_FEATURECOUNTS_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_featurecounts_multiqc='&'#" ; else echo "# par_featurecounts_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC+x} ]; then echo "${VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_featurecounts_rrna_multiqc='&'#" ; else echo "# par_featurecounts_rrna_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_ALIGNER_PCA_MULTIQC+x} ]; then echo "${VIASH_PAR_ALIGNER_PCA_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_aligner_pca_multiqc='&'#" ; else echo "# par_aligner_pca_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC+x} ]; then echo "${VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_aligner_clustering_multiqc='&'#" ; else echo "# par_aligner_clustering_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC+x} ]; then echo "${VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_pseudo_aligner_pca_multiqc='&'#" ; else echo "# par_pseudo_aligner_pca_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC+x} ]; then echo "${VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_pseudo_aligner_clustering_multiqc='&'#" ; else echo "# par_pseudo_aligner_clustering_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_PRESEQ_MULTIQC+x} ]; then echo "${VIASH_PAR_PRESEQ_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_preseq_multiqc='&'#" ; else echo "# par_preseq_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_QUALIMAP_MULTIQC+x} ]; then echo "${VIASH_PAR_QUALIMAP_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_qualimap_multiqc='&'#" ; else echo "# par_qualimap_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC+x} ]; then echo "${VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC}" | sed "s#'#'\"'\"'#g;s#.*#par_dupradar_output_dup_intercept_mqc='&'#" ; else echo "# par_dupradar_output_dup_intercept_mqc="; fi ) +$( if [ ! -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC+x} ]; then echo "${VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC}" | sed "s#'#'\"'\"'#g;s#.*#par_dupradar_output_duprate_exp_denscurve_mqc='&'#" ; else echo "# par_dupradar_output_duprate_exp_denscurve_mqc="; fi ) +$( if [ ! -z ${VIASH_PAR_BAMSTAT_MULTIQC+x} ]; then echo "${VIASH_PAR_BAMSTAT_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_bamstat_multiqc='&'#" ; else echo "# par_bamstat_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_INFEREXPERIMENT_MULTIQC+x} ]; then echo "${VIASH_PAR_INFEREXPERIMENT_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_inferexperiment_multiqc='&'#" ; else echo "# par_inferexperiment_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_INNERDISTANCE_MULTIQC+x} ]; then echo "${VIASH_PAR_INNERDISTANCE_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_innerdistance_multiqc='&'#" ; else echo "# par_innerdistance_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_JUNCTIONANNOTATION_MULTIQC+x} ]; then echo "${VIASH_PAR_JUNCTIONANNOTATION_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_junctionannotation_multiqc='&'#" ; else echo "# par_junctionannotation_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_JUNCTIONSATURATION_MULTIQC+x} ]; then echo "${VIASH_PAR_JUNCTIONSATURATION_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_junctionsaturation_multiqc='&'#" ; else echo "# par_junctionsaturation_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_READDISTRIBUTION_MULTIQC+x} ]; then echo "${VIASH_PAR_READDISTRIBUTION_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_readdistribution_multiqc='&'#" ; else echo "# par_readdistribution_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_READDUPLICATION_MULTIQC+x} ]; then echo "${VIASH_PAR_READDUPLICATION_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_readduplication_multiqc='&'#" ; else echo "# par_readduplication_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_TIN_MULTIQC+x} ]; then echo "${VIASH_PAR_TIN_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_tin_multiqc='&'#" ; else echo "# par_tin_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_MULTIQC_CONFIG+x} ]; then echo "${VIASH_PAR_MULTIQC_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#par_multiqc_config='&'#" ; else echo "# par_multiqc_config="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +mkdir -p \$par_output + +echo \$par_fail_trimming_multiqc > \$par_output/fail_trimming_mqc.tsv +echo \$par_fail_mapping_multiqc > \$par_output/fail_mapping_mqc.tsv +echo \$par_fail_strand_multiqc > \$par_output/fail_strand_mqc.tsv + +IFS="," read -ra fastqc_raw_multiqc <<< \$par_fastqc_raw_multiqc && for file in "\${fastqc_raw_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra fastqc_trim_multiqc <<< \$par_fastqc_trim_multiqc && for file in "\${fastqc_trim_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra trim_log_multiqc <<< \$par_trim_log_multiqc && for file in "\${trim_log_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra sortmerna_multiqc <<< \$par_sortmerna_multiqc && for file in "\${sortmerna_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra star_multiqc <<< \$par_star_multiqc && for file in "\${star_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +# IFS="," read -ra hisat2_multiqc <<< \$par_hisat2_multiqc && for file in "\${hisat2_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra rsem_multiqc <<< \$par_rsem_multiqc && for file in "\${rsem_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra salmon_multiqc <<< \$par_salmon_multiqc && for file in "\${salmon_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra pseudo_multiqc <<< \$par_pseudo_multiqc && for file in "\${pseudo_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra samtools_stats <<< \$par_samtools_stats && for file in "\${samtools_stats[@]}"; do [ -e "\$file" ] && cp -r "\$file" \$par_output/; done + +IFS="," read -ra samtools_flagstat <<< \$par_samtools_flagstat && for file in "\${samtools_flagstat[@]}"; do [ -e "\$file" ] && cp -r "\$file" \$par_output/; done + +IFS="," read -ra samtools_idxstats <<< \$par_samtools_idxstats && for file in "\${samtools_idxstats[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra markduplicates_multiqc <<< \$par_markduplicates_multiqc && for file in "\${markduplicates_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra featurecounts_multiqc <<< \$par_featurecounts_multiqc && for file in "\${featurecounts_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra featurecounts_rrna_multiqc <<< \$par_featurecounts_rrna_multiqc && for file in "\${featurecounts_rrna_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +[ -e "\$par_aligner_pca_multiqc" ] && cp -r "\$par_aligner_pca_multiqc" "\$par_output/" + +[ -e "\$par_aligner_clustering_multiqc" ] && cp -r \$par_aligner_clustering_multiqc "\$par_output/" + +[ -e "\$par_pseudo_aligner_pca_multiqc" ] && cp -r \$par_pseudo_aligner_pca_multiqc "\$par_output/" + +[ -e "\$par_pseudo_aligner_clustering_multiqc" ] && cp -r \$par_pseudo_aligner_clustering_multiqc "\$par_output/" + +IFS="," read -ra preseq_multiqc <<< \$par_preseq_multiqc && for file in "\${preseq_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra qualimap_multiqc <<< \$par_qualimap_multiqc && for file in "\${qualimap_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra dupradar_output_dup_intercept_mqc <<< \$par_dupradar_output_dup_intercept_mqc && for file in "\${dupradar_output_dup_intercept_mqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra dupradar_output_duprate_exp_denscurve_mqc <<< \$par_dupradar_output_duprate_exp_denscurve_mqc && for file in "\${dupradar_output_duprate_exp_denscurve_mqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra bamstat_multiqc <<< \$par_bamstat_multiqc && for file in "\${bamstat_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra inferexperiment_multiqc <<< \$par_inferexperiment_multiqc && for file in "\${inferexperiment_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra innerdistance_multiqc <<< \$par_innerdistance_multiqc && for file in "\${innerdistance_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra junctionannotation_multiqc <<< \$par_junctionannotation_multiqc && for file in "\${junctionannotation_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra junctionsaturation_multiqc <<< \$par_junctionsaturation_multiqc && for file in "\${junctionsaturation_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra readdistribution_multiqc <<< \$par_readdistribution_multiqc && for file in "\${readdistribution_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra readduplication_multiqc <<< \$par_readduplication_multiqc && for file in "\${readduplication_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +IFS="," read -ra tin_multiqc <<< \$par_tin_multiqc && for file in "\${tin_multiqc[@]}"; do [ -e "\$file" ] && cp -r "\$file" "\$par_output/"; done + +echo "Checking for custom multiqc_config" +# If the variable is empty, we use the default one (registered as a resource) +if [ -z \$par_multiqc_config ]; then + echo "No multiqc_config provided, using the default" + cp \$meta_resources_dir/multiqc_config.yml "\$par_output" +else + echo "Optional file provided" + if [ -f \$par_multiqc_config ]; then + cp \$par_multiqc_config "\$par_output"/multiqc_config.yml + else + # Unreachable: the Viash-generated module checks this + echo "Optional file does not exist" + exit 1 + fi +fi + +echo "Done" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_FASTQC_RAW_MULTIQC" ]; then + unset VIASH_TEST_FASTQC_RAW_MULTIQC + IFS=',' + for var in $VIASH_PAR_FASTQC_RAW_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_FASTQC_RAW_MULTIQC" ]; then + VIASH_TEST_FASTQC_RAW_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_FASTQC_RAW_MULTIQC="$VIASH_TEST_FASTQC_RAW_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_FASTQC_RAW_MULTIQC="$VIASH_TEST_FASTQC_RAW_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_FASTQC_TRIM_MULTIQC" ]; then + unset VIASH_TEST_FASTQC_TRIM_MULTIQC + IFS=',' + for var in $VIASH_PAR_FASTQC_TRIM_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_FASTQC_TRIM_MULTIQC" ]; then + VIASH_TEST_FASTQC_TRIM_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_FASTQC_TRIM_MULTIQC="$VIASH_TEST_FASTQC_TRIM_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_FASTQC_TRIM_MULTIQC="$VIASH_TEST_FASTQC_TRIM_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_TRIM_LOG_MULTIQC" ]; then + unset VIASH_TEST_TRIM_LOG_MULTIQC + IFS=',' + for var in $VIASH_PAR_TRIM_LOG_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_TRIM_LOG_MULTIQC" ]; then + VIASH_TEST_TRIM_LOG_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_TRIM_LOG_MULTIQC="$VIASH_TEST_TRIM_LOG_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_TRIM_LOG_MULTIQC="$VIASH_TEST_TRIM_LOG_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_SORTMERNA_MULTIQC" ]; then + unset VIASH_TEST_SORTMERNA_MULTIQC + IFS=',' + for var in $VIASH_PAR_SORTMERNA_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_SORTMERNA_MULTIQC" ]; then + VIASH_TEST_SORTMERNA_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_SORTMERNA_MULTIQC="$VIASH_TEST_SORTMERNA_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_SORTMERNA_MULTIQC="$VIASH_TEST_SORTMERNA_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_STAR_MULTIQC" ]; then + unset VIASH_TEST_STAR_MULTIQC + IFS=',' + for var in $VIASH_PAR_STAR_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_STAR_MULTIQC" ]; then + VIASH_TEST_STAR_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_STAR_MULTIQC="$VIASH_TEST_STAR_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_STAR_MULTIQC="$VIASH_TEST_STAR_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_SALMON_MULTIQC" ]; then + unset VIASH_TEST_SALMON_MULTIQC + IFS=',' + for var in $VIASH_PAR_SALMON_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_SALMON_MULTIQC" ]; then + VIASH_TEST_SALMON_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_SALMON_MULTIQC="$VIASH_TEST_SALMON_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_SALMON_MULTIQC="$VIASH_TEST_SALMON_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_SAMTOOLS_STATS" ]; then + unset VIASH_TEST_SAMTOOLS_STATS + IFS=',' + for var in $VIASH_PAR_SAMTOOLS_STATS; do + unset IFS + if [ -z "$VIASH_TEST_SAMTOOLS_STATS" ]; then + VIASH_TEST_SAMTOOLS_STATS="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_SAMTOOLS_STATS="$VIASH_TEST_SAMTOOLS_STATS,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_SAMTOOLS_STATS="$VIASH_TEST_SAMTOOLS_STATS" + fi + if [ ! -z "$VIASH_PAR_SAMTOOLS_FLAGSTAT" ]; then + unset VIASH_TEST_SAMTOOLS_FLAGSTAT + IFS=',' + for var in $VIASH_PAR_SAMTOOLS_FLAGSTAT; do + unset IFS + if [ -z "$VIASH_TEST_SAMTOOLS_FLAGSTAT" ]; then + VIASH_TEST_SAMTOOLS_FLAGSTAT="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_SAMTOOLS_FLAGSTAT="$VIASH_TEST_SAMTOOLS_FLAGSTAT,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_SAMTOOLS_FLAGSTAT="$VIASH_TEST_SAMTOOLS_FLAGSTAT" + fi + if [ ! -z "$VIASH_PAR_SAMTOOLS_IDXSTATS" ]; then + unset VIASH_TEST_SAMTOOLS_IDXSTATS + IFS=',' + for var in $VIASH_PAR_SAMTOOLS_IDXSTATS; do + unset IFS + if [ -z "$VIASH_TEST_SAMTOOLS_IDXSTATS" ]; then + VIASH_TEST_SAMTOOLS_IDXSTATS="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_SAMTOOLS_IDXSTATS="$VIASH_TEST_SAMTOOLS_IDXSTATS,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_SAMTOOLS_IDXSTATS="$VIASH_TEST_SAMTOOLS_IDXSTATS" + fi + if [ ! -z "$VIASH_PAR_MARKDUPLICATES_MULTIQC" ]; then + unset VIASH_TEST_MARKDUPLICATES_MULTIQC + IFS=',' + for var in $VIASH_PAR_MARKDUPLICATES_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_MARKDUPLICATES_MULTIQC" ]; then + VIASH_TEST_MARKDUPLICATES_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_MARKDUPLICATES_MULTIQC="$VIASH_TEST_MARKDUPLICATES_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_MARKDUPLICATES_MULTIQC="$VIASH_TEST_MARKDUPLICATES_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_PSEUDO_MULTIQC" ]; then + unset VIASH_TEST_PSEUDO_MULTIQC + IFS=',' + for var in $VIASH_PAR_PSEUDO_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_PSEUDO_MULTIQC" ]; then + VIASH_TEST_PSEUDO_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_PSEUDO_MULTIQC="$VIASH_TEST_PSEUDO_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_PSEUDO_MULTIQC="$VIASH_TEST_PSEUDO_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ]; then + unset VIASH_TEST_FEATURECOUNTS_MULTIQC + IFS=',' + for var in $VIASH_PAR_FEATURECOUNTS_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_FEATURECOUNTS_MULTIQC" ]; then + VIASH_TEST_FEATURECOUNTS_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_FEATURECOUNTS_MULTIQC="$VIASH_TEST_FEATURECOUNTS_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_FEATURECOUNTS_MULTIQC="$VIASH_TEST_FEATURECOUNTS_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ]; then + unset VIASH_TEST_FEATURECOUNTS_RRNA_MULTIQC + IFS=',' + for var in $VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_FEATURECOUNTS_RRNA_MULTIQC" ]; then + VIASH_TEST_FEATURECOUNTS_RRNA_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_FEATURECOUNTS_RRNA_MULTIQC="$VIASH_TEST_FEATURECOUNTS_RRNA_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC="$VIASH_TEST_FEATURECOUNTS_RRNA_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_ALIGNER_PCA_MULTIQC" ]; then + VIASH_PAR_ALIGNER_PCA_MULTIQC=$(ViashDockerStripAutomount "$VIASH_PAR_ALIGNER_PCA_MULTIQC") + fi + if [ ! -z "$VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC" ]; then + VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC=$(ViashDockerStripAutomount "$VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC") + fi + if [ ! -z "$VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC" ]; then + VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC=$(ViashDockerStripAutomount "$VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC") + fi + if [ ! -z "$VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC" ]; then + VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC=$(ViashDockerStripAutomount "$VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC") + fi + if [ ! -z "$VIASH_PAR_PRESEQ_MULTIQC" ]; then + unset VIASH_TEST_PRESEQ_MULTIQC + IFS=',' + for var in $VIASH_PAR_PRESEQ_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_PRESEQ_MULTIQC" ]; then + VIASH_TEST_PRESEQ_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_PRESEQ_MULTIQC="$VIASH_TEST_PRESEQ_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_PRESEQ_MULTIQC="$VIASH_TEST_PRESEQ_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_QUALIMAP_MULTIQC" ]; then + unset VIASH_TEST_QUALIMAP_MULTIQC + IFS=',' + for var in $VIASH_PAR_QUALIMAP_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_QUALIMAP_MULTIQC" ]; then + VIASH_TEST_QUALIMAP_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_QUALIMAP_MULTIQC="$VIASH_TEST_QUALIMAP_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_QUALIMAP_MULTIQC="$VIASH_TEST_QUALIMAP_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ]; then + unset VIASH_TEST_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC + IFS=',' + for var in $VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC; do + unset IFS + if [ -z "$VIASH_TEST_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ]; then + VIASH_TEST_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC="$VIASH_TEST_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC="$VIASH_TEST_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" + fi + if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ]; then + unset VIASH_TEST_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC + IFS=',' + for var in $VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC; do + unset IFS + if [ -z "$VIASH_TEST_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ]; then + VIASH_TEST_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC="$VIASH_TEST_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC="$VIASH_TEST_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" + fi + if [ ! -z "$VIASH_PAR_BAMSTAT_MULTIQC" ]; then + unset VIASH_TEST_BAMSTAT_MULTIQC + IFS=',' + for var in $VIASH_PAR_BAMSTAT_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_BAMSTAT_MULTIQC" ]; then + VIASH_TEST_BAMSTAT_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_BAMSTAT_MULTIQC="$VIASH_TEST_BAMSTAT_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_BAMSTAT_MULTIQC="$VIASH_TEST_BAMSTAT_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_INFEREXPERIMENT_MULTIQC" ]; then + unset VIASH_TEST_INFEREXPERIMENT_MULTIQC + IFS=',' + for var in $VIASH_PAR_INFEREXPERIMENT_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_INFEREXPERIMENT_MULTIQC" ]; then + VIASH_TEST_INFEREXPERIMENT_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_INFEREXPERIMENT_MULTIQC="$VIASH_TEST_INFEREXPERIMENT_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_INFEREXPERIMENT_MULTIQC="$VIASH_TEST_INFEREXPERIMENT_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_INNERDISTANCE_MULTIQC" ]; then + unset VIASH_TEST_INNERDISTANCE_MULTIQC + IFS=',' + for var in $VIASH_PAR_INNERDISTANCE_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_INNERDISTANCE_MULTIQC" ]; then + VIASH_TEST_INNERDISTANCE_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_INNERDISTANCE_MULTIQC="$VIASH_TEST_INNERDISTANCE_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_INNERDISTANCE_MULTIQC="$VIASH_TEST_INNERDISTANCE_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_JUNCTIONANNOTATION_MULTIQC" ]; then + unset VIASH_TEST_JUNCTIONANNOTATION_MULTIQC + IFS=',' + for var in $VIASH_PAR_JUNCTIONANNOTATION_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_JUNCTIONANNOTATION_MULTIQC" ]; then + VIASH_TEST_JUNCTIONANNOTATION_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_JUNCTIONANNOTATION_MULTIQC="$VIASH_TEST_JUNCTIONANNOTATION_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_JUNCTIONANNOTATION_MULTIQC="$VIASH_TEST_JUNCTIONANNOTATION_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_JUNCTIONSATURATION_MULTIQC" ]; then + unset VIASH_TEST_JUNCTIONSATURATION_MULTIQC + IFS=',' + for var in $VIASH_PAR_JUNCTIONSATURATION_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_JUNCTIONSATURATION_MULTIQC" ]; then + VIASH_TEST_JUNCTIONSATURATION_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_JUNCTIONSATURATION_MULTIQC="$VIASH_TEST_JUNCTIONSATURATION_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_JUNCTIONSATURATION_MULTIQC="$VIASH_TEST_JUNCTIONSATURATION_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_READDISTRIBUTION_MULTIQC" ]; then + unset VIASH_TEST_READDISTRIBUTION_MULTIQC + IFS=',' + for var in $VIASH_PAR_READDISTRIBUTION_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_READDISTRIBUTION_MULTIQC" ]; then + VIASH_TEST_READDISTRIBUTION_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_READDISTRIBUTION_MULTIQC="$VIASH_TEST_READDISTRIBUTION_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_READDISTRIBUTION_MULTIQC="$VIASH_TEST_READDISTRIBUTION_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_READDUPLICATION_MULTIQC" ]; then + unset VIASH_TEST_READDUPLICATION_MULTIQC + IFS=',' + for var in $VIASH_PAR_READDUPLICATION_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_READDUPLICATION_MULTIQC" ]; then + VIASH_TEST_READDUPLICATION_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_READDUPLICATION_MULTIQC="$VIASH_TEST_READDUPLICATION_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_READDUPLICATION_MULTIQC="$VIASH_TEST_READDUPLICATION_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_TIN_MULTIQC" ]; then + unset VIASH_TEST_TIN_MULTIQC + IFS=',' + for var in $VIASH_PAR_TIN_MULTIQC; do + unset IFS + if [ -z "$VIASH_TEST_TIN_MULTIQC" ]; then + VIASH_TEST_TIN_MULTIQC="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_TIN_MULTIQC="$VIASH_TEST_TIN_MULTIQC,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_TIN_MULTIQC="$VIASH_TEST_TIN_MULTIQC" + fi + if [ ! -z "$VIASH_PAR_MULTIQC_CONFIG" ]; then + VIASH_PAR_MULTIQC_CONFIG=$(ViashDockerStripAutomount "$VIASH_PAR_MULTIQC_CONFIG") + fi + if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/preprocess_transcripts_fasta/.config.vsh.yaml b/target/executable/preprocess_transcripts_fasta/.config.vsh.yaml new file mode 100644 index 0000000..8519d40 --- /dev/null +++ b/target/executable/preprocess_transcripts_fasta/.config.vsh.yaml @@ -0,0 +1,178 @@ +name: "preprocess_transcripts_fasta" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--transcript_fasta" + description: "Path of transcripts FASTA file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + description: "Path of processed output FASTA file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Process transcripts FASTA if GTF file is GENOCODE format\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "transcriptome.fasta" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/preprocess_transcripts_fasta_gencode.nf" + last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/preprocess_transcripts_fasta/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/preprocess_transcripts_fasta" + executable: "target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/preprocess_transcripts_fasta/nextflow_labels.config b/target/executable/preprocess_transcripts_fasta/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/preprocess_transcripts_fasta/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta b/target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta new file mode 100755 index 0000000..0101402 --- /dev/null +++ b/target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta @@ -0,0 +1,1106 @@ +#!/usr/bin/env bash + +# preprocess_transcripts_fasta v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="preprocess_transcripts_fasta" +VIASH_META_FUNCTIONALITY_NAME="preprocess_transcripts_fasta" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +LABEL org.opencontainers.image.description="Companion container for running component preprocess_transcripts_fasta" +LABEL org.opencontainers.image.created="2025-05-07T12:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "preprocess_transcripts_fasta v0.3.0" + echo "" + echo "Process transcripts FASTA if GTF file is GENOCODE format" + echo "" + echo "Input:" + echo " --transcript_fasta" + echo " type: file, required parameter, file must exist" + echo " Path of transcripts FASTA file" + echo "" + echo "Output:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " Path of processed output FASTA file." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "preprocess_transcripts_fasta v0.3.0" + exit + ;; + --transcript_fasta) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--transcript_fasta\': \'$VIASH_PAR_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcript_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcript_fasta=*) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--transcript_fasta=*\': \'$VIASH_PAR_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/preprocess_transcripts_fasta:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_TRANSCRIPT_FASTA+x} ]; then + ViashError '--transcript_fasta' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_TRANSCRIPT_FASTA" ] && [ ! -e "$VIASH_PAR_TRANSCRIPT_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_TRANSCRIPT_FASTA' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_TRANSCRIPT_FASTA" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TRANSCRIPT_FASTA")" ) + VIASH_PAR_TRANSCRIPT_FASTA=$(ViashDockerAutodetectMount "$VIASH_PAR_TRANSCRIPT_FASTA") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-preprocess_transcripts_fasta-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_TRANSCRIPT_FASTA+x} ]; then echo "${VIASH_PAR_TRANSCRIPT_FASTA}" | sed "s#'#'\"'\"'#g;s#.*#par_transcript_fasta='&'#" ; else echo "# par_transcript_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +filename="\$(basename -- "\$par_transcript_fasta")" + +if [ \${filename##*.} == "gz" ]; then + zcat \$par_transcript_fasta | cut -d "|" -f1 > \$par_output +else + cat \$par_transcript_fasta | cut -d "|" -f1 > \$par_output +fi +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_TRANSCRIPT_FASTA" ]; then + VIASH_PAR_TRANSCRIPT_FASTA=$(ViashDockerStripAutomount "$VIASH_PAR_TRANSCRIPT_FASTA") + fi + if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/preseq_lcextrap/.config.vsh.yaml b/target/executable/preseq_lcextrap/.config.vsh.yaml new file mode 100644 index 0000000..9394d37 --- /dev/null +++ b/target/executable/preseq_lcextrap/.config.vsh.yaml @@ -0,0 +1,231 @@ +name: "preseq_lcextrap" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "Input genome BAM/BED file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_preseq_args" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--paired" + description: "Paired-end reads?" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + info: null + default: + - "$id.lc_extrap.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Computing the expected future yield of distinct reads and bounds on\ + \ the number of total distinct reads in the library and the associated confidence\ + \ intervals." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "a.sorted.bed" +- type: "file" + path: "SRR1106616_5M_subset.bam" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/preseq/lcextrap/main.nf" + - "modules/nf-core/preseq/lcextrap/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "curl" + - "bzip2" + - "build-essential" + - "wget" + - "gcc" + - "autoconf" + - "automake" + - "make" + - "libz-dev" + - "libbz2-dev" + - "zlib1g-dev" + - "libncurses5-dev" + - "libncursesw5-dev" + - "liblzma-dev" + - "pip" + interactive: false + - type: "docker" + run: + - "cd /usr/bin && \\\nwget --no-check-certificate https://github.com/smithlabcode/preseq/releases/download/v3.2.0/preseq-3.2.0.tar.gz\ + \ && \\\nwget https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2\ + \ && \\\nwget --no-check-certificate https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static\ + \ && \\\ncurl -fsSL https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2\ + \ -o samtools-1.18.tar.bz2 && \\\ntar -xjf samtools-1.18.tar.bz2 && rm samtools-1.18.tar.bz2\ + \ && \\\ntar -xzf preseq-3.2.0.tar.gz && rm preseq-3.2.0.tar.gz && \\\ntar -vxjf\ + \ htslib-1.9.tar.bz2 && rm htslib-1.9.tar.bz2 && \\\nmv bedtools.static /usr/local/bin/bedtools\ + \ && \\\nchmod a+x /usr/local/bin/bedtools && \\\ncd samtools-1.18 && \\\n./configure\ + \ && \\\nmake && \\\nmake install && \\\ncd /usr/bin && cd htslib-1.9 && \\\n\ + make && \\\ncd /usr/bin && cd preseq-3.2.0 && \\\nmkdir build && cd build &&\ + \ \\\n../configure && \\\nmake && make install && make HAVE_HTSLIB=1 all \n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/preseq_lcextrap/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/preseq_lcextrap" + executable: "target/executable/preseq_lcextrap/preseq_lcextrap" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/preseq_lcextrap/nextflow_labels.config b/target/executable/preseq_lcextrap/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/preseq_lcextrap/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/preseq_lcextrap/preseq_lcextrap b/target/executable/preseq_lcextrap/preseq_lcextrap new file mode 100755 index 0000000..03e2c43 --- /dev/null +++ b/target/executable/preseq_lcextrap/preseq_lcextrap @@ -0,0 +1,1184 @@ +#!/usr/bin/env bash + +# preseq_lcextrap v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="preseq_lcextrap" +VIASH_META_FUNCTIONALITY_NAME="preseq_lcextrap" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y curl bzip2 build-essential wget gcc autoconf automake make libz-dev libbz2-dev zlib1g-dev libncurses5-dev libncursesw5-dev liblzma-dev pip && \ + rm -rf /var/lib/apt/lists/* + +RUN cd /usr/bin && \ +wget --no-check-certificate https://github.com/smithlabcode/preseq/releases/download/v3.2.0/preseq-3.2.0.tar.gz && \ +wget https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2 && \ +wget --no-check-certificate https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static && \ +curl -fsSL https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2 -o samtools-1.18.tar.bz2 && \ +tar -xjf samtools-1.18.tar.bz2 && rm samtools-1.18.tar.bz2 && \ +tar -xzf preseq-3.2.0.tar.gz && rm preseq-3.2.0.tar.gz && \ +tar -vxjf htslib-1.9.tar.bz2 && rm htslib-1.9.tar.bz2 && \ +mv bedtools.static /usr/local/bin/bedtools && \ +chmod a+x /usr/local/bin/bedtools && \ +cd samtools-1.18 && \ +./configure && \ +make && \ +make install && \ +cd /usr/bin && cd htslib-1.9 && \ +make && \ +cd /usr/bin && cd preseq-3.2.0 && \ +mkdir build && cd build && \ +../configure && \ +make && make install && make HAVE_HTSLIB=1 all + +LABEL org.opencontainers.image.description="Companion container for running component preseq_lcextrap" +LABEL org.opencontainers.image.created="2025-05-07T12:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "preseq_lcextrap v0.3.0" + echo "" + echo "Computing the expected future yield of distinct reads and bounds on the number" + echo "of total distinct reads in the library and the associated confidence intervals." + echo "" + echo "Input:" + echo " --input" + echo " type: file, file must exist" + echo " Input genome BAM/BED file" + echo "" + echo " --extra_preseq_args" + echo " type: string" + echo "" + echo " --paired" + echo " type: boolean" + echo " Paired-end reads?" + echo "" + echo "Output:" + echo " --output" + echo " type: file, output, file must exist" + echo " default: \$id.lc_extrap.txt" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "preseq_lcextrap v0.3.0" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --extra_preseq_args) + [ -n "$VIASH_PAR_EXTRA_PRESEQ_ARGS" ] && ViashError Bad arguments for option \'--extra_preseq_args\': \'$VIASH_PAR_EXTRA_PRESEQ_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_PRESEQ_ARGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_preseq_args. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --extra_preseq_args=*) + [ -n "$VIASH_PAR_EXTRA_PRESEQ_ARGS" ] && ViashError Bad arguments for option \'--extra_preseq_args=*\': \'$VIASH_PAR_EXTRA_PRESEQ_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_PRESEQ_ARGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --paired) + [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PAIRED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --paired. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --paired=*) + [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired=*\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PAIRED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/preseq_lcextrap:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="\$id.lc_extrap.txt" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_PAIRED" ]]; then + if ! [[ "$VIASH_PAR_PAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--paired' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-preseq_lcextrap-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRA_PRESEQ_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_PRESEQ_ARGS}" | sed "s#'#'\"'\"'#g;s#.*#par_extra_preseq_args='&'#" ; else echo "# par_extra_preseq_args="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\"'\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +file=\$(basename -- "\$par_input") +filename="\${file%.*}" + +if [ "\${file##*.}" == "bam" ]; then + samtools sort -o sorted_\$filename.bam -n \$par_input + bedtools bamtobed -i sorted_\$filename.bam > \$filename.bed + bedtools sort -i \$filename.bed > sorted_\$filename.bed +elif [ "\${file##*.}" == "bed" ]; then + bedtools sort -i \$par_input > sorted_\$filename.bed +else + echo "Invalid input file format!" + exit 1 +fi + +if \$par_paired; then + paired="-pe" +else + paired="" +fi + +preseq lc_extrap \\ + sorted_\$filename.bed \\ + \$paired \\ + \$par_extra_preseq_args \\ + -o \$par_output +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/rsem_merge_counts/.config.vsh.yaml b/target/executable/rsem_merge_counts/.config.vsh.yaml new file mode 100644 index 0000000..558c0d5 --- /dev/null +++ b/target/executable/rsem_merge_counts/.config.vsh.yaml @@ -0,0 +1,221 @@ +name: "rsem_merge_counts" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--counts_gene" + description: "Expression counts on gene level (genes)" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcripts" + description: "Expression counts on transcript level (isoforms)" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--merged_gene_counts" + description: "File containing gene counts across all samples." + info: null + default: + - "rsem.merged.gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--merged_gene_tpm" + description: "File containing gene TPM across all samples." + info: null + default: + - "rsem.merged.gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--merged_transcript_counts" + description: "File containing transcript counts across all samples." + info: null + default: + - "rsem.merged.transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--merged_transcript_tpm" + description: "File containing transcript TPM across all samples." + info: null + default: + - "rsem.merged.transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Merge the transcript quantification results obtained from rsem calculate-expression\ + \ across all samples.\n" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/rsem_merge_counts/main.nf" + last_sha: "311279532694ce7520164ce4d65a388c0cd11f60" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rsem_merge_counts/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/rsem_merge_counts" + executable: "target/executable/rsem_merge_counts/rsem_merge_counts" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/rsem_merge_counts/nextflow_labels.config b/target/executable/rsem_merge_counts/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/rsem_merge_counts/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/rsem_merge_counts/rsem_merge_counts b/target/executable/rsem_merge_counts/rsem_merge_counts new file mode 100755 index 0000000..4f0f8af --- /dev/null +++ b/target/executable/rsem_merge_counts/rsem_merge_counts @@ -0,0 +1,1254 @@ +#!/usr/bin/env bash + +# rsem_merge_counts v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="rsem_merge_counts" +VIASH_META_FUNCTIONALITY_NAME="rsem_merge_counts" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +LABEL org.opencontainers.image.description="Companion container for running component rsem_merge_counts" +LABEL org.opencontainers.image.created="2025-05-07T12:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rsem_merge_counts v0.3.0" + echo "" + echo "Merge the transcript quantification results obtained from rsem" + echo "calculate-expression across all samples." + echo "" + echo "Input:" + echo " --counts_gene" + echo " type: file, file must exist" + echo " Expression counts on gene level (genes)" + echo "" + echo " --counts_transcripts" + echo " type: file, file must exist" + echo " Expression counts on transcript level (isoforms)" + echo "" + echo "Output:" + echo " --merged_gene_counts" + echo " type: file, output, file must exist" + echo " default: rsem.merged.gene_counts.tsv" + echo " File containing gene counts across all samples." + echo "" + echo " --merged_gene_tpm" + echo " type: file, output, file must exist" + echo " default: rsem.merged.gene_tpm.tsv" + echo " File containing gene TPM across all samples." + echo "" + echo " --merged_transcript_counts" + echo " type: file, output, file must exist" + echo " default: rsem.merged.transcript_counts.tsv" + echo " File containing transcript counts across all samples." + echo "" + echo " --merged_transcript_tpm" + echo " type: file, output, file must exist" + echo " default: rsem.merged.transcript_tpm.tsv" + echo " File containing transcript TPM across all samples." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "rsem_merge_counts v0.3.0" + exit + ;; + --counts_gene) + [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene=*) + [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene=*\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_transcripts) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPTS" ] && ViashError Bad arguments for option \'--counts_transcripts\': \'$VIASH_PAR_COUNTS_TRANSCRIPTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_transcripts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_transcripts=*) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPTS" ] && ViashError Bad arguments for option \'--counts_transcripts=*\': \'$VIASH_PAR_COUNTS_TRANSCRIPTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --merged_gene_counts) + [ -n "$VIASH_PAR_MERGED_GENE_COUNTS" ] && ViashError Bad arguments for option \'--merged_gene_counts\': \'$VIASH_PAR_MERGED_GENE_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MERGED_GENE_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --merged_gene_counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --merged_gene_counts=*) + [ -n "$VIASH_PAR_MERGED_GENE_COUNTS" ] && ViashError Bad arguments for option \'--merged_gene_counts=*\': \'$VIASH_PAR_MERGED_GENE_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MERGED_GENE_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --merged_gene_tpm) + [ -n "$VIASH_PAR_MERGED_GENE_TPM" ] && ViashError Bad arguments for option \'--merged_gene_tpm\': \'$VIASH_PAR_MERGED_GENE_TPM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MERGED_GENE_TPM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --merged_gene_tpm. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --merged_gene_tpm=*) + [ -n "$VIASH_PAR_MERGED_GENE_TPM" ] && ViashError Bad arguments for option \'--merged_gene_tpm=*\': \'$VIASH_PAR_MERGED_GENE_TPM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MERGED_GENE_TPM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --merged_transcript_counts) + [ -n "$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS" ] && ViashError Bad arguments for option \'--merged_transcript_counts\': \'$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MERGED_TRANSCRIPT_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --merged_transcript_counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --merged_transcript_counts=*) + [ -n "$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS" ] && ViashError Bad arguments for option \'--merged_transcript_counts=*\': \'$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MERGED_TRANSCRIPT_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --merged_transcript_tpm) + [ -n "$VIASH_PAR_MERGED_TRANSCRIPT_TPM" ] && ViashError Bad arguments for option \'--merged_transcript_tpm\': \'$VIASH_PAR_MERGED_TRANSCRIPT_TPM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MERGED_TRANSCRIPT_TPM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --merged_transcript_tpm. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --merged_transcript_tpm=*) + [ -n "$VIASH_PAR_MERGED_TRANSCRIPT_TPM" ] && ViashError Bad arguments for option \'--merged_transcript_tpm=*\': \'$VIASH_PAR_MERGED_TRANSCRIPT_TPM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MERGED_TRANSCRIPT_TPM=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/rsem_merge_counts:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MERGED_GENE_COUNTS+x} ]; then + VIASH_PAR_MERGED_GENE_COUNTS="rsem.merged.gene_counts.tsv" +fi +if [ -z ${VIASH_PAR_MERGED_GENE_TPM+x} ]; then + VIASH_PAR_MERGED_GENE_TPM="rsem.merged.gene_tpm.tsv" +fi +if [ -z ${VIASH_PAR_MERGED_TRANSCRIPT_COUNTS+x} ]; then + VIASH_PAR_MERGED_TRANSCRIPT_COUNTS="rsem.merged.transcript_counts.tsv" +fi +if [ -z ${VIASH_PAR_MERGED_TRANSCRIPT_TPM+x} ]; then + VIASH_PAR_MERGED_TRANSCRIPT_TPM="rsem.merged.transcript_tpm.tsv" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE" ]; then + ViashError "Input file '$VIASH_PAR_COUNTS_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPTS" ] && [ ! -e "$VIASH_PAR_COUNTS_TRANSCRIPTS" ]; then + ViashError "Input file '$VIASH_PAR_COUNTS_TRANSCRIPTS' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_MERGED_GENE_COUNTS" ] && [ ! -d "$(dirname "$VIASH_PAR_MERGED_GENE_COUNTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MERGED_GENE_COUNTS")" +fi +if [ ! -z "$VIASH_PAR_MERGED_GENE_TPM" ] && [ ! -d "$(dirname "$VIASH_PAR_MERGED_GENE_TPM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MERGED_GENE_TPM")" +fi +if [ ! -z "$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS" ] && [ ! -d "$(dirname "$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS")" +fi +if [ ! -z "$VIASH_PAR_MERGED_TRANSCRIPT_TPM" ] && [ ! -d "$(dirname "$VIASH_PAR_MERGED_TRANSCRIPT_TPM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MERGED_TRANSCRIPT_TPM")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_COUNTS_GENE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COUNTS_GENE")" ) + VIASH_PAR_COUNTS_GENE=$(ViashDockerAutodetectMount "$VIASH_PAR_COUNTS_GENE") +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPTS" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COUNTS_TRANSCRIPTS")" ) + VIASH_PAR_COUNTS_TRANSCRIPTS=$(ViashDockerAutodetectMount "$VIASH_PAR_COUNTS_TRANSCRIPTS") +fi +if [ ! -z "$VIASH_PAR_MERGED_GENE_COUNTS" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_MERGED_GENE_COUNTS")" ) + VIASH_PAR_MERGED_GENE_COUNTS=$(ViashDockerAutodetectMount "$VIASH_PAR_MERGED_GENE_COUNTS") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_MERGED_GENE_COUNTS" ) +fi +if [ ! -z "$VIASH_PAR_MERGED_GENE_TPM" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_MERGED_GENE_TPM")" ) + VIASH_PAR_MERGED_GENE_TPM=$(ViashDockerAutodetectMount "$VIASH_PAR_MERGED_GENE_TPM") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_MERGED_GENE_TPM" ) +fi +if [ ! -z "$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS")" ) + VIASH_PAR_MERGED_TRANSCRIPT_COUNTS=$(ViashDockerAutodetectMount "$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS" ) +fi +if [ ! -z "$VIASH_PAR_MERGED_TRANSCRIPT_TPM" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_MERGED_TRANSCRIPT_TPM")" ) + VIASH_PAR_MERGED_TRANSCRIPT_TPM=$(ViashDockerAutodetectMount "$VIASH_PAR_MERGED_TRANSCRIPT_TPM") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_MERGED_TRANSCRIPT_TPM" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-rsem_merge_counts-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE+x} ]; then echo "${VIASH_PAR_COUNTS_GENE}" | sed "s#'#'\"'\"'#g;s#.*#par_counts_gene='&'#" ; else echo "# par_counts_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_TRANSCRIPTS+x} ]; then echo "${VIASH_PAR_COUNTS_TRANSCRIPTS}" | sed "s#'#'\"'\"'#g;s#.*#par_counts_transcripts='&'#" ; else echo "# par_counts_transcripts="; fi ) +$( if [ ! -z ${VIASH_PAR_MERGED_GENE_COUNTS+x} ]; then echo "${VIASH_PAR_MERGED_GENE_COUNTS}" | sed "s#'#'\"'\"'#g;s#.*#par_merged_gene_counts='&'#" ; else echo "# par_merged_gene_counts="; fi ) +$( if [ ! -z ${VIASH_PAR_MERGED_GENE_TPM+x} ]; then echo "${VIASH_PAR_MERGED_GENE_TPM}" | sed "s#'#'\"'\"'#g;s#.*#par_merged_gene_tpm='&'#" ; else echo "# par_merged_gene_tpm="; fi ) +$( if [ ! -z ${VIASH_PAR_MERGED_TRANSCRIPT_COUNTS+x} ]; then echo "${VIASH_PAR_MERGED_TRANSCRIPT_COUNTS}" | sed "s#'#'\"'\"'#g;s#.*#par_merged_transcript_counts='&'#" ; else echo "# par_merged_transcript_counts="; fi ) +$( if [ ! -z ${VIASH_PAR_MERGED_TRANSCRIPT_TPM+x} ]; then echo "${VIASH_PAR_MERGED_TRANSCRIPT_TPM}" | sed "s#'#'\"'\"'#g;s#.*#par_merged_transcript_tpm='&'#" ; else echo "# par_merged_transcript_tpm="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -ep pipefail + +mkdir -p tmp/genes +# cut -f 1,2 \`ls \$par_count_genes/*\` | head -n 1\` > gene_ids.txt +for file_id in \${par_count_genes[*]}; do + samplename=\`basename \$file_id | sed s/\\\\.genes.results\\\$//g\` + echo \$samplename > tmp/genes/\${samplename}.counts.txt + cut -f 5 \${file_id} | tail -n+2 >> tmp/genes/\${samplename}.counts.txt + echo \$samplename > tmp/genes/\${samplename}.tpm.txt + cut -f 6 \${file_id} | tail -n+2 >> tmp/genes/\${samplename}.tpm.txt +done + +mkdir -p tmp/isoforms +# cut -f 1,2 \`ls \$par_counts_transcripts/*\` | head -n 1\` > transcript_ids.txt +for file_id in \${par_counts_transcripts[*]}; do + samplename=\`basename \$file_id | sed s/\\\\.isoforms.results\\\$//g\` + echo \$samplename > tmp/isoforms/\${samplename}.counts.txt + cut -f 5 \${file_id} | tail -n+2 >> tmp/isoforms/\${samplename}.counts.txt + echo \$samplename > tmp/isoforms/\${samplename}.tpm.txt + cut -f 6 \${file_id} | tail -n+2 >> tmp/isoforms/\${samplename}.tpm.txt +done + +paste gene_ids.txt tmp/genes/*.counts.txt > \$par_merged_gene_counts +paste gene_ids.txt tmp/genes/*.tpm.txt > \$par_merged_gene_tpm +paste transcript_ids.txt tmp/isoforms/*.counts.txt > \$par_merged_transcript_counts +paste transcript_ids.txt tmp/isoforms/*.tpm.txt > \$par_merged_transcript_tpm +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_COUNTS_GENE" ]; then + VIASH_PAR_COUNTS_GENE=$(ViashDockerStripAutomount "$VIASH_PAR_COUNTS_GENE") + fi + if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPTS" ]; then + VIASH_PAR_COUNTS_TRANSCRIPTS=$(ViashDockerStripAutomount "$VIASH_PAR_COUNTS_TRANSCRIPTS") + fi + if [ ! -z "$VIASH_PAR_MERGED_GENE_COUNTS" ]; then + VIASH_PAR_MERGED_GENE_COUNTS=$(ViashDockerStripAutomount "$VIASH_PAR_MERGED_GENE_COUNTS") + fi + if [ ! -z "$VIASH_PAR_MERGED_GENE_TPM" ]; then + VIASH_PAR_MERGED_GENE_TPM=$(ViashDockerStripAutomount "$VIASH_PAR_MERGED_GENE_TPM") + fi + if [ ! -z "$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS" ]; then + VIASH_PAR_MERGED_TRANSCRIPT_COUNTS=$(ViashDockerStripAutomount "$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS") + fi + if [ ! -z "$VIASH_PAR_MERGED_TRANSCRIPT_TPM" ]; then + VIASH_PAR_MERGED_TRANSCRIPT_TPM=$(ViashDockerStripAutomount "$VIASH_PAR_MERGED_TRANSCRIPT_TPM") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_MERGED_GENE_COUNTS" ] && [ ! -e "$VIASH_PAR_MERGED_GENE_COUNTS" ]; then + ViashError "Output file '$VIASH_PAR_MERGED_GENE_COUNTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MERGED_GENE_TPM" ] && [ ! -e "$VIASH_PAR_MERGED_GENE_TPM" ]; then + ViashError "Output file '$VIASH_PAR_MERGED_GENE_TPM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS" ] && [ ! -e "$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS" ]; then + ViashError "Output file '$VIASH_PAR_MERGED_TRANSCRIPT_COUNTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MERGED_TRANSCRIPT_TPM" ] && [ ! -e "$VIASH_PAR_MERGED_TRANSCRIPT_TPM" ]; then + ViashError "Output file '$VIASH_PAR_MERGED_TRANSCRIPT_TPM' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/rseqc/rseqc_junctionannotation/.config.vsh.yaml b/target/executable/rseqc/rseqc_junctionannotation/.config.vsh.yaml new file mode 100644 index 0000000..d3d223f --- /dev/null +++ b/target/executable/rseqc/rseqc_junctionannotation/.config.vsh.yaml @@ -0,0 +1,300 @@ +name: "rseqc_junctionannotation" +namespace: "rseqc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "input alignment file in BAM or SAM format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--refgene" + description: "Reference gene model in bed format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--map_qual" + description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ + \ reads, default=30." + info: null + default: + - 30 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_intron" + description: "Minimum intron length (bp), default = 50." + info: null + default: + - 50 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_log" + description: "output log of junction annotation script" + info: null + default: + - "$id.junction_annotation.log" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_plot_r" + description: "r script to generate splice_junction and splice_events plot" + info: null + default: + - "$id.junction_annotation_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_junction_bed" + description: "junction annotation file (bed format)" + info: null + default: + - "$id.junction_annotation.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_junction_interact" + description: "interact file (bed format) of junctions. Can be uploaded to UCSC\ + \ genome browser or converted to bigInteract (using bedToBigBed program) for\ + \ visualization." + info: null + default: + - "$id.junction_annotation.Interact.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_junction_sheet" + description: "junction annotation file (xls format)" + info: null + default: + - "$id.junction_annotation.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_splice_events_plot" + description: "plot of splice events (pdf)" + info: null + default: + - "$id.splice_events.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_splice_junctions_plot" + description: "plot of junctions (pdf)" + info: null + default: + - "$id.splice_junctions_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Compare detected splice junctions to reference gene model.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.paired_end.sorted.bam" +- type: "file" + path: "test.bed12" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/rseqc/junctionannotation/main.nf" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "python3-pip" + - "r-base" + interactive: false + - type: "python" + user: false + packages: + - "RSeQC" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rseqc/rseqc_junctionannotation/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/rseqc/rseqc_junctionannotation" + executable: "target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/rseqc/rseqc_junctionannotation/nextflow_labels.config b/target/executable/rseqc/rseqc_junctionannotation/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/rseqc/rseqc_junctionannotation/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation b/target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation new file mode 100755 index 0000000..a237100 --- /dev/null +++ b/target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation @@ -0,0 +1,1430 @@ +#!/usr/bin/env bash + +# rseqc_junctionannotation v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="rseqc_junctionannotation" +VIASH_META_FUNCTIONALITY_NAME="rseqc_junctionannotation" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y python3-pip r-base && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "RSeQC" + +LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_junctionannotation" +LABEL org.opencontainers.image.created="2025-05-07T12:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rseqc_junctionannotation v0.3.0" + echo "" + echo "Compare detected splice junctions to reference gene model." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " input alignment file in BAM or SAM format" + echo "" + echo " --refgene" + echo " type: file, required parameter, file must exist" + echo " Reference gene model in bed format" + echo "" + echo " --map_qual" + echo " type: integer" + echo " default: 30" + echo " min: 0" + echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" + echo " reads, default=30." + echo "" + echo " --min_intron" + echo " type: integer" + echo " default: 50" + echo " min: 1" + echo " Minimum intron length (bp), default = 50." + echo "" + echo "Output:" + echo " --output_log" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation.log" + echo " output log of junction annotation script" + echo "" + echo " --output_plot_r" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation_plot.r" + echo " r script to generate splice_junction and splice_events plot" + echo "" + echo " --output_junction_bed" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation.bed" + echo " junction annotation file (bed format)" + echo "" + echo " --output_junction_interact" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation.Interact.bed" + echo " interact file (bed format) of junctions. Can be uploaded to UCSC genome" + echo " browser or converted to bigInteract (using bedToBigBed program) for" + echo " visualization." + echo "" + echo " --output_junction_sheet" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation.xls" + echo " junction annotation file (xls format)" + echo "" + echo " --output_splice_events_plot" + echo " type: file, output, file must exist" + echo " default: \$id.splice_events.pdf" + echo " plot of splice events (pdf)" + echo "" + echo " --output_splice_junctions_plot" + echo " type: file, output, file must exist" + echo " default: \$id.splice_junctions_plot.pdf" + echo " plot of junctions (pdf)" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "rseqc_junctionannotation v0.3.0" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --refgene) + [ -n "$VIASH_PAR_REFGENE" ] && ViashError Bad arguments for option \'--refgene\': \'$VIASH_PAR_REFGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFGENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --refgene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --refgene=*) + [ -n "$VIASH_PAR_REFGENE" ] && ViashError Bad arguments for option \'--refgene=*\': \'$VIASH_PAR_REFGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFGENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --map_qual) + [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAP_QUAL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --map_qual. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --map_qual=*) + [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual=*\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAP_QUAL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_intron) + [ -n "$VIASH_PAR_MIN_INTRON" ] && ViashError Bad arguments for option \'--min_intron\': \'$VIASH_PAR_MIN_INTRON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_INTRON="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_intron. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_intron=*) + [ -n "$VIASH_PAR_MIN_INTRON" ] && ViashError Bad arguments for option \'--min_intron=*\': \'$VIASH_PAR_MIN_INTRON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_INTRON=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_log) + [ -n "$VIASH_PAR_OUTPUT_LOG" ] && ViashError Bad arguments for option \'--output_log\': \'$VIASH_PAR_OUTPUT_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_LOG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_log. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_log=*) + [ -n "$VIASH_PAR_OUTPUT_LOG" ] && ViashError Bad arguments for option \'--output_log=*\': \'$VIASH_PAR_OUTPUT_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_LOG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_plot_r) + [ -n "$VIASH_PAR_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--output_plot_r\': \'$VIASH_PAR_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_PLOT_R="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_plot_r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_plot_r=*) + [ -n "$VIASH_PAR_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--output_plot_r=*\': \'$VIASH_PAR_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_PLOT_R=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_junction_bed) + [ -n "$VIASH_PAR_OUTPUT_JUNCTION_BED" ] && ViashError Bad arguments for option \'--output_junction_bed\': \'$VIASH_PAR_OUTPUT_JUNCTION_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_JUNCTION_BED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_junction_bed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_junction_bed=*) + [ -n "$VIASH_PAR_OUTPUT_JUNCTION_BED" ] && ViashError Bad arguments for option \'--output_junction_bed=*\': \'$VIASH_PAR_OUTPUT_JUNCTION_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_JUNCTION_BED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_junction_interact) + [ -n "$VIASH_PAR_OUTPUT_JUNCTION_INTERACT" ] && ViashError Bad arguments for option \'--output_junction_interact\': \'$VIASH_PAR_OUTPUT_JUNCTION_INTERACT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_JUNCTION_INTERACT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_junction_interact. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_junction_interact=*) + [ -n "$VIASH_PAR_OUTPUT_JUNCTION_INTERACT" ] && ViashError Bad arguments for option \'--output_junction_interact=*\': \'$VIASH_PAR_OUTPUT_JUNCTION_INTERACT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_JUNCTION_INTERACT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_junction_sheet) + [ -n "$VIASH_PAR_OUTPUT_JUNCTION_SHEET" ] && ViashError Bad arguments for option \'--output_junction_sheet\': \'$VIASH_PAR_OUTPUT_JUNCTION_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_JUNCTION_SHEET="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_junction_sheet. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_junction_sheet=*) + [ -n "$VIASH_PAR_OUTPUT_JUNCTION_SHEET" ] && ViashError Bad arguments for option \'--output_junction_sheet=*\': \'$VIASH_PAR_OUTPUT_JUNCTION_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_JUNCTION_SHEET=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_splice_events_plot) + [ -n "$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT" ] && ViashError Bad arguments for option \'--output_splice_events_plot\': \'$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_splice_events_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_splice_events_plot=*) + [ -n "$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT" ] && ViashError Bad arguments for option \'--output_splice_events_plot=*\': \'$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_splice_junctions_plot) + [ -n "$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT" ] && ViashError Bad arguments for option \'--output_splice_junctions_plot\': \'$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_splice_junctions_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_splice_junctions_plot=*) + [ -n "$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT" ] && ViashError Bad arguments for option \'--output_splice_junctions_plot=*\': \'$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/rseqc/rseqc_junctionannotation:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFGENE+x} ]; then + ViashError '--refgene' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MAP_QUAL+x} ]; then + VIASH_PAR_MAP_QUAL="30" +fi +if [ -z ${VIASH_PAR_MIN_INTRON+x} ]; then + VIASH_PAR_MIN_INTRON="50" +fi +if [ -z ${VIASH_PAR_OUTPUT_LOG+x} ]; then + VIASH_PAR_OUTPUT_LOG="\$id.junction_annotation.log" +fi +if [ -z ${VIASH_PAR_OUTPUT_PLOT_R+x} ]; then + VIASH_PAR_OUTPUT_PLOT_R="\$id.junction_annotation_plot.r" +fi +if [ -z ${VIASH_PAR_OUTPUT_JUNCTION_BED+x} ]; then + VIASH_PAR_OUTPUT_JUNCTION_BED="\$id.junction_annotation.bed" +fi +if [ -z ${VIASH_PAR_OUTPUT_JUNCTION_INTERACT+x} ]; then + VIASH_PAR_OUTPUT_JUNCTION_INTERACT="\$id.junction_annotation.Interact.bed" +fi +if [ -z ${VIASH_PAR_OUTPUT_JUNCTION_SHEET+x} ]; then + VIASH_PAR_OUTPUT_JUNCTION_SHEET="\$id.junction_annotation.xls" +fi +if [ -z ${VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT+x} ]; then + VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT="\$id.splice_events.pdf" +fi +if [ -z ${VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT+x} ]; then + VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT="\$id.splice_junctions_plot.pdf" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFGENE" ] && [ ! -e "$VIASH_PAR_REFGENE" ]; then + ViashError "Input file '$VIASH_PAR_REFGENE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MAP_QUAL" ]]; then + if ! [[ "$VIASH_PAR_MAP_QUAL" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--map_qual' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_MAP_QUAL -lt 0 ]]; then + ViashError '--map_qual' has be more than or equal to 0. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_INTRON" ]]; then + if ! [[ "$VIASH_PAR_MIN_INTRON" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_intron' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_MIN_INTRON -lt 1 ]]; then + ViashError '--min_intron' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT_LOG" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_LOG")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_LOG")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_PLOT_R" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_PLOT_R")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_PLOT_R")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_JUNCTION_BED" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_JUNCTION_BED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_JUNCTION_BED")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_JUNCTION_INTERACT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_JUNCTION_INTERACT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_JUNCTION_INTERACT")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_JUNCTION_SHEET" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_JUNCTION_SHEET")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_JUNCTION_SHEET")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFGENE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_REFGENE")" ) + VIASH_PAR_REFGENE=$(ViashDockerAutodetectMount "$VIASH_PAR_REFGENE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_LOG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_LOG")" ) + VIASH_PAR_OUTPUT_LOG=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_LOG") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_LOG" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_PLOT_R" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_PLOT_R")" ) + VIASH_PAR_OUTPUT_PLOT_R=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_PLOT_R") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_PLOT_R" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_JUNCTION_BED" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_JUNCTION_BED")" ) + VIASH_PAR_OUTPUT_JUNCTION_BED=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_JUNCTION_BED") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_JUNCTION_BED" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_JUNCTION_INTERACT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_JUNCTION_INTERACT")" ) + VIASH_PAR_OUTPUT_JUNCTION_INTERACT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_JUNCTION_INTERACT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_JUNCTION_INTERACT" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_JUNCTION_SHEET" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_JUNCTION_SHEET")" ) + VIASH_PAR_OUTPUT_JUNCTION_SHEET=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_JUNCTION_SHEET") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_JUNCTION_SHEET" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT")" ) + VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT")" ) + VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-rseqc_junctionannotation-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\"'\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) +$( if [ ! -z ${VIASH_PAR_MAP_QUAL+x} ]; then echo "${VIASH_PAR_MAP_QUAL}" | sed "s#'#'\"'\"'#g;s#.*#par_map_qual='&'#" ; else echo "# par_map_qual="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_INTRON+x} ]; then echo "${VIASH_PAR_MIN_INTRON}" | sed "s#'#'\"'\"'#g;s#.*#par_min_intron='&'#" ; else echo "# par_min_intron="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_LOG+x} ]; then echo "${VIASH_PAR_OUTPUT_LOG}" | sed "s#'#'\"'\"'#g;s#.*#par_output_log='&'#" ; else echo "# par_output_log="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_PLOT_R+x} ]; then echo "${VIASH_PAR_OUTPUT_PLOT_R}" | sed "s#'#'\"'\"'#g;s#.*#par_output_plot_r='&'#" ; else echo "# par_output_plot_r="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_JUNCTION_BED+x} ]; then echo "${VIASH_PAR_OUTPUT_JUNCTION_BED}" | sed "s#'#'\"'\"'#g;s#.*#par_output_junction_bed='&'#" ; else echo "# par_output_junction_bed="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_JUNCTION_INTERACT+x} ]; then echo "${VIASH_PAR_OUTPUT_JUNCTION_INTERACT}" | sed "s#'#'\"'\"'#g;s#.*#par_output_junction_interact='&'#" ; else echo "# par_output_junction_interact="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_JUNCTION_SHEET+x} ]; then echo "${VIASH_PAR_OUTPUT_JUNCTION_SHEET}" | sed "s#'#'\"'\"'#g;s#.*#par_output_junction_sheet='&'#" ; else echo "# par_output_junction_sheet="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT}" | sed "s#'#'\"'\"'#g;s#.*#par_output_splice_events_plot='&'#" ; else echo "# par_output_splice_events_plot="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT}" | sed "s#'#'\"'\"'#g;s#.*#par_output_splice_junctions_plot='&'#" ; else echo "# par_output_splice_junctions_plot="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +prefix=\$(openssl rand -hex 8) +input="testData/unit_test_resources/test.paired_end.sorted.bam" +refgene="testData/unit_test_resources/test.bed" +junction_annotation.py \\ + -i \$par_input \\ + -r \$par_refgene \\ + -o \$prefix \\ + -m \$par_min_intron \\ + -q \$par_map_qual > \$par_output_log + +[[ -f "\$prefix.junction.bed" ]] && mv \$prefix.junction.bed \$par_output_junction_bed +[[ -f "\$prefix.junction.Interact.bed" ]] && mv \$prefix.junction.Interact.bed \$par_output_junction_interact +[[ -f "\$prefix.junction.xls" ]] && mv \$prefix.junction.xls \$par_output_junction_sheet +[[ -f "\$prefix.junction_plot.r" ]] && mv \$prefix.junction_plot.r \$par_output_plot_r +[[ -f "\$prefix.splice_events.pdf" ]] && mv \$prefix.splice_events.pdf \$par_output_splice_events_plot +[[ -f "\$prefix.splice_junction.pdf" ]] && mv \$prefix.splice_junction.pdf \$par_output_splice_junctions_plot +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") + fi + if [ ! -z "$VIASH_PAR_REFGENE" ]; then + VIASH_PAR_REFGENE=$(ViashDockerStripAutomount "$VIASH_PAR_REFGENE") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_LOG" ]; then + VIASH_PAR_OUTPUT_LOG=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_LOG") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_PLOT_R" ]; then + VIASH_PAR_OUTPUT_PLOT_R=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_PLOT_R") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_JUNCTION_BED" ]; then + VIASH_PAR_OUTPUT_JUNCTION_BED=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_JUNCTION_BED") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_JUNCTION_INTERACT" ]; then + VIASH_PAR_OUTPUT_JUNCTION_INTERACT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_JUNCTION_INTERACT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_JUNCTION_SHEET" ]; then + VIASH_PAR_OUTPUT_JUNCTION_SHEET=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_JUNCTION_SHEET") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT" ]; then + VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT" ]; then + VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT_LOG" ] && [ ! -e "$VIASH_PAR_OUTPUT_LOG" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_LOG' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_PLOT_R" ] && [ ! -e "$VIASH_PAR_OUTPUT_PLOT_R" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_PLOT_R' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_JUNCTION_BED" ] && [ ! -e "$VIASH_PAR_OUTPUT_JUNCTION_BED" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_JUNCTION_BED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_JUNCTION_INTERACT" ] && [ ! -e "$VIASH_PAR_OUTPUT_JUNCTION_INTERACT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_JUNCTION_INTERACT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_JUNCTION_SHEET" ] && [ ! -e "$VIASH_PAR_OUTPUT_JUNCTION_SHEET" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_JUNCTION_SHEET' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT" ] && [ ! -e "$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT" ] && [ ! -e "$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/rseqc/rseqc_junctionsaturation/.config.vsh.yaml b/target/executable/rseqc/rseqc_junctionsaturation/.config.vsh.yaml new file mode 100644 index 0000000..81075af --- /dev/null +++ b/target/executable/rseqc/rseqc_junctionsaturation/.config.vsh.yaml @@ -0,0 +1,289 @@ +name: "rseqc_junctionsaturation" +namespace: "rseqc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "input alignment file in BAM or SAM format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--refgene" + description: "Reference gene model in bed format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sampling_percentile_lower_bound" + description: "Sampling starts from this percentile, must be an integer between\ + \ 0 and 100, default =5." + info: null + default: + - 5 + required: false + min: 0 + max: 100 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sampling_percentile_upper_bound" + description: "Sampling ends at this percentile, must be an integer between 0 and\ + \ 100, default =5." + info: null + default: + - 100 + required: false + min: 0 + max: 100 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sampling_percentile_step" + description: "Sampling frequency in %. Smaller value means more sampling times.\ + \ Must be an integer between 0 and 100, default = 5." + info: null + default: + - 5 + required: false + min: 0 + max: 100 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_intron" + description: "Minimum intron length (bp), default = 50." + info: null + default: + - 50 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_splice_read" + description: "Minimum number of supporting reads to call a junction, default =\ + \ 1." + info: null + default: + - 1 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--map_qual" + description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ + \ reads, default=30." + info: null + default: + - 30 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_plot_r" + description: "r script to generate junction_saturation_plot plot" + info: null + default: + - "$id.junction_saturation_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_plot" + description: "plot of junction saturation (pdf)" + info: null + default: + - "$id.junction_saturation_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Compare detected splice junctions to reference gene model.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.paired_end.sorted.bam" +- type: "file" + path: "test.bed" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/rseqc/junctionsaturation/main.nf" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "python3-pip" + - "r-base" + interactive: false + - type: "python" + user: false + packages: + - "RSeQC" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rseqc/rseqc_junctionsaturation/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/rseqc/rseqc_junctionsaturation" + executable: "target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/rseqc/rseqc_junctionsaturation/nextflow_labels.config b/target/executable/rseqc/rseqc_junctionsaturation/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/rseqc/rseqc_junctionsaturation/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation b/target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation new file mode 100755 index 0000000..ac11405 --- /dev/null +++ b/target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation @@ -0,0 +1,1394 @@ +#!/usr/bin/env bash + +# rseqc_junctionsaturation v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="rseqc_junctionsaturation" +VIASH_META_FUNCTIONALITY_NAME="rseqc_junctionsaturation" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y python3-pip r-base && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "RSeQC" + +LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_junctionsaturation" +LABEL org.opencontainers.image.created="2025-05-07T12:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rseqc_junctionsaturation v0.3.0" + echo "" + echo "Compare detected splice junctions to reference gene model." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " input alignment file in BAM or SAM format" + echo "" + echo " --refgene" + echo " type: file, required parameter, file must exist" + echo " Reference gene model in bed format" + echo "" + echo " --sampling_percentile_lower_bound" + echo " type: integer" + echo " default: 5" + echo " min: 0" + echo " max: 100" + echo " Sampling starts from this percentile, must be an integer between 0 and" + echo " 100, default =5." + echo "" + echo " --sampling_percentile_upper_bound" + echo " type: integer" + echo " default: 100" + echo " min: 0" + echo " max: 100" + echo " Sampling ends at this percentile, must be an integer between 0 and 100," + echo " default =5." + echo "" + echo " --sampling_percentile_step" + echo " type: integer" + echo " default: 5" + echo " min: 0" + echo " max: 100" + echo " Sampling frequency in %. Smaller value means more sampling times. Must" + echo " be an integer between 0 and 100, default = 5." + echo "" + echo " --min_intron" + echo " type: integer" + echo " default: 50" + echo " min: 1" + echo " Minimum intron length (bp), default = 50." + echo "" + echo " --min_splice_read" + echo " type: integer" + echo " default: 1" + echo " min: 1" + echo " Minimum number of supporting reads to call a junction, default = 1." + echo "" + echo " --map_qual" + echo " type: integer" + echo " default: 30" + echo " min: 0" + echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" + echo " reads, default=30." + echo "" + echo "Output:" + echo " --output_plot_r" + echo " type: file, output, file must exist" + echo " default: \$id.junction_saturation_plot.r" + echo " r script to generate junction_saturation_plot plot" + echo "" + echo " --output_plot" + echo " type: file, output, file must exist" + echo " default: \$id.junction_saturation_plot.pdf" + echo " plot of junction saturation (pdf)" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "rseqc_junctionsaturation v0.3.0" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --refgene) + [ -n "$VIASH_PAR_REFGENE" ] && ViashError Bad arguments for option \'--refgene\': \'$VIASH_PAR_REFGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFGENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --refgene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --refgene=*) + [ -n "$VIASH_PAR_REFGENE" ] && ViashError Bad arguments for option \'--refgene=*\': \'$VIASH_PAR_REFGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFGENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sampling_percentile_lower_bound) + [ -n "$VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND" ] && ViashError Bad arguments for option \'--sampling_percentile_lower_bound\': \'$VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sampling_percentile_lower_bound. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sampling_percentile_lower_bound=*) + [ -n "$VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND" ] && ViashError Bad arguments for option \'--sampling_percentile_lower_bound=*\': \'$VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sampling_percentile_upper_bound) + [ -n "$VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND" ] && ViashError Bad arguments for option \'--sampling_percentile_upper_bound\': \'$VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sampling_percentile_upper_bound. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sampling_percentile_upper_bound=*) + [ -n "$VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND" ] && ViashError Bad arguments for option \'--sampling_percentile_upper_bound=*\': \'$VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sampling_percentile_step) + [ -n "$VIASH_PAR_SAMPLING_PERCENTILE_STEP" ] && ViashError Bad arguments for option \'--sampling_percentile_step\': \'$VIASH_PAR_SAMPLING_PERCENTILE_STEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_PERCENTILE_STEP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sampling_percentile_step. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sampling_percentile_step=*) + [ -n "$VIASH_PAR_SAMPLING_PERCENTILE_STEP" ] && ViashError Bad arguments for option \'--sampling_percentile_step=*\': \'$VIASH_PAR_SAMPLING_PERCENTILE_STEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_PERCENTILE_STEP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_intron) + [ -n "$VIASH_PAR_MIN_INTRON" ] && ViashError Bad arguments for option \'--min_intron\': \'$VIASH_PAR_MIN_INTRON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_INTRON="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_intron. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_intron=*) + [ -n "$VIASH_PAR_MIN_INTRON" ] && ViashError Bad arguments for option \'--min_intron=*\': \'$VIASH_PAR_MIN_INTRON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_INTRON=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_splice_read) + [ -n "$VIASH_PAR_MIN_SPLICE_READ" ] && ViashError Bad arguments for option \'--min_splice_read\': \'$VIASH_PAR_MIN_SPLICE_READ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SPLICE_READ="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_splice_read. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_splice_read=*) + [ -n "$VIASH_PAR_MIN_SPLICE_READ" ] && ViashError Bad arguments for option \'--min_splice_read=*\': \'$VIASH_PAR_MIN_SPLICE_READ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SPLICE_READ=$(ViashRemoveFlags "$1") + shift 1 + ;; + --map_qual) + [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAP_QUAL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --map_qual. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --map_qual=*) + [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual=*\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAP_QUAL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_plot_r) + [ -n "$VIASH_PAR_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--output_plot_r\': \'$VIASH_PAR_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_PLOT_R="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_plot_r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_plot_r=*) + [ -n "$VIASH_PAR_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--output_plot_r=*\': \'$VIASH_PAR_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_PLOT_R=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_plot) + [ -n "$VIASH_PAR_OUTPUT_PLOT" ] && ViashError Bad arguments for option \'--output_plot\': \'$VIASH_PAR_OUTPUT_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_plot=*) + [ -n "$VIASH_PAR_OUTPUT_PLOT" ] && ViashError Bad arguments for option \'--output_plot=*\': \'$VIASH_PAR_OUTPUT_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/rseqc/rseqc_junctionsaturation:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFGENE+x} ]; then + ViashError '--refgene' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND+x} ]; then + VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND="5" +fi +if [ -z ${VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND+x} ]; then + VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND="100" +fi +if [ -z ${VIASH_PAR_SAMPLING_PERCENTILE_STEP+x} ]; then + VIASH_PAR_SAMPLING_PERCENTILE_STEP="5" +fi +if [ -z ${VIASH_PAR_MIN_INTRON+x} ]; then + VIASH_PAR_MIN_INTRON="50" +fi +if [ -z ${VIASH_PAR_MIN_SPLICE_READ+x} ]; then + VIASH_PAR_MIN_SPLICE_READ="1" +fi +if [ -z ${VIASH_PAR_MAP_QUAL+x} ]; then + VIASH_PAR_MAP_QUAL="30" +fi +if [ -z ${VIASH_PAR_OUTPUT_PLOT_R+x} ]; then + VIASH_PAR_OUTPUT_PLOT_R="\$id.junction_saturation_plot.r" +fi +if [ -z ${VIASH_PAR_OUTPUT_PLOT+x} ]; then + VIASH_PAR_OUTPUT_PLOT="\$id.junction_saturation_plot.pdf" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFGENE" ] && [ ! -e "$VIASH_PAR_REFGENE" ]; then + ViashError "Input file '$VIASH_PAR_REFGENE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND" ]]; then + if ! [[ "$VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sampling_percentile_lower_bound' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND -lt 0 ]]; then + ViashError '--sampling_percentile_lower_bound' has be more than or equal to 0. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND -gt 100 ]]; then + ViashError '--sampling_percentile_lower_bound' has be less than or equal to 100. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND" ]]; then + if ! [[ "$VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sampling_percentile_upper_bound' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND -lt 0 ]]; then + ViashError '--sampling_percentile_upper_bound' has be more than or equal to 0. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND -gt 100 ]]; then + ViashError '--sampling_percentile_upper_bound' has be less than or equal to 100. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SAMPLING_PERCENTILE_STEP" ]]; then + if ! [[ "$VIASH_PAR_SAMPLING_PERCENTILE_STEP" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sampling_percentile_step' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLING_PERCENTILE_STEP -lt 0 ]]; then + ViashError '--sampling_percentile_step' has be more than or equal to 0. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLING_PERCENTILE_STEP -gt 100 ]]; then + ViashError '--sampling_percentile_step' has be less than or equal to 100. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_INTRON" ]]; then + if ! [[ "$VIASH_PAR_MIN_INTRON" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_intron' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_MIN_INTRON -lt 1 ]]; then + ViashError '--min_intron' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_SPLICE_READ" ]]; then + if ! [[ "$VIASH_PAR_MIN_SPLICE_READ" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_splice_read' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_MIN_SPLICE_READ -lt 1 ]]; then + ViashError '--min_splice_read' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAP_QUAL" ]]; then + if ! [[ "$VIASH_PAR_MAP_QUAL" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--map_qual' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_MAP_QUAL -lt 0 ]]; then + ViashError '--map_qual' has be more than or equal to 0. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT_PLOT_R" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_PLOT_R")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_PLOT_R")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_PLOT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFGENE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_REFGENE")" ) + VIASH_PAR_REFGENE=$(ViashDockerAutodetectMount "$VIASH_PAR_REFGENE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_PLOT_R" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_PLOT_R")" ) + VIASH_PAR_OUTPUT_PLOT_R=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_PLOT_R") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_PLOT_R" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_PLOT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_PLOT")" ) + VIASH_PAR_OUTPUT_PLOT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_PLOT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_PLOT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-rseqc_junctionsaturation-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\"'\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND+x} ]; then echo "${VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND}" | sed "s#'#'\"'\"'#g;s#.*#par_sampling_percentile_lower_bound='&'#" ; else echo "# par_sampling_percentile_lower_bound="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND+x} ]; then echo "${VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND}" | sed "s#'#'\"'\"'#g;s#.*#par_sampling_percentile_upper_bound='&'#" ; else echo "# par_sampling_percentile_upper_bound="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLING_PERCENTILE_STEP+x} ]; then echo "${VIASH_PAR_SAMPLING_PERCENTILE_STEP}" | sed "s#'#'\"'\"'#g;s#.*#par_sampling_percentile_step='&'#" ; else echo "# par_sampling_percentile_step="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_INTRON+x} ]; then echo "${VIASH_PAR_MIN_INTRON}" | sed "s#'#'\"'\"'#g;s#.*#par_min_intron='&'#" ; else echo "# par_min_intron="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_SPLICE_READ+x} ]; then echo "${VIASH_PAR_MIN_SPLICE_READ}" | sed "s#'#'\"'\"'#g;s#.*#par_min_splice_read='&'#" ; else echo "# par_min_splice_read="; fi ) +$( if [ ! -z ${VIASH_PAR_MAP_QUAL+x} ]; then echo "${VIASH_PAR_MAP_QUAL}" | sed "s#'#'\"'\"'#g;s#.*#par_map_qual='&'#" ; else echo "# par_map_qual="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_PLOT_R+x} ]; then echo "${VIASH_PAR_OUTPUT_PLOT_R}" | sed "s#'#'\"'\"'#g;s#.*#par_output_plot_r='&'#" ; else echo "# par_output_plot_r="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_PLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_PLOT}" | sed "s#'#'\"'\"'#g;s#.*#par_output_plot='&'#" ; else echo "# par_output_plot="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +prefix=\$(openssl rand -hex 8) + +junction_saturation.py \\ + -i \$par_input \\ + -r \$par_refgene \\ + -o \$prefix \\ + -l \$par_sampling_percentile_lower_bound \\ + -u \$par_sampling_percentile_upper_bound \\ + -s \$par_sampling_percentile_step \\ + -m \$par_min_intron \\ + -v \$par_min_splice_read \\ + -q \$par_map_qual + +[[ -f "\$prefix.junctionSaturation_plot.pdf" ]] && mv \$prefix.junctionSaturation_plot.pdf \$par_output_plot +[[ -f "\$prefix.junctionSaturation_plot.r" ]] && mv \$prefix.junctionSaturation_plot.r \$par_output_plot_r +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") + fi + if [ ! -z "$VIASH_PAR_REFGENE" ]; then + VIASH_PAR_REFGENE=$(ViashDockerStripAutomount "$VIASH_PAR_REFGENE") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_PLOT_R" ]; then + VIASH_PAR_OUTPUT_PLOT_R=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_PLOT_R") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_PLOT" ]; then + VIASH_PAR_OUTPUT_PLOT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_PLOT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT_PLOT_R" ] && [ ! -e "$VIASH_PAR_OUTPUT_PLOT_R" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_PLOT_R' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_PLOT" ] && [ ! -e "$VIASH_PAR_OUTPUT_PLOT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_PLOT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/rseqc/rseqc_readdistribution/.config.vsh.yaml b/target/executable/rseqc/rseqc_readdistribution/.config.vsh.yaml new file mode 100644 index 0000000..ec82c69 --- /dev/null +++ b/target/executable/rseqc/rseqc_readdistribution/.config.vsh.yaml @@ -0,0 +1,202 @@ +name: "rseqc_readdistribution" +namespace: "rseqc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "input alignment file in BAM or SAM format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--refgene" + description: "Reference gene model in bed format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + description: "output file (txt) of read distribution analysis." + info: null + default: + - "$id.read_distribution.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Calculate how mapped reads are distributed over genomic features.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.paired_end.sorted.bam" +- type: "file" + path: "test.bed12" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/rseqc/readdistribution/main.nf" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "python3-pip" + interactive: false + - type: "python" + user: false + packages: + - "RSeQC" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rseqc/rseqc_readdistribution/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/rseqc/rseqc_readdistribution" + executable: "target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/rseqc/rseqc_readdistribution/nextflow_labels.config b/target/executable/rseqc/rseqc_readdistribution/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/rseqc/rseqc_readdistribution/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution b/target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution new file mode 100755 index 0000000..6fada15 --- /dev/null +++ b/target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution @@ -0,0 +1,1143 @@ +#!/usr/bin/env bash + +# rseqc_readdistribution v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="rseqc_readdistribution" +VIASH_META_FUNCTIONALITY_NAME="rseqc_readdistribution" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y python3-pip && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "RSeQC" + +LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_readdistribution" +LABEL org.opencontainers.image.created="2025-05-07T12:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rseqc_readdistribution v0.3.0" + echo "" + echo "Calculate how mapped reads are distributed over genomic features." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " input alignment file in BAM or SAM format" + echo "" + echo " --refgene" + echo " type: file, required parameter, file must exist" + echo " Reference gene model in bed format" + echo "" + echo "Output:" + echo " --output" + echo " type: file, output, file must exist" + echo " default: \$id.read_distribution.txt" + echo " output file (txt) of read distribution analysis." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "rseqc_readdistribution v0.3.0" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --refgene) + [ -n "$VIASH_PAR_REFGENE" ] && ViashError Bad arguments for option \'--refgene\': \'$VIASH_PAR_REFGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFGENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --refgene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --refgene=*) + [ -n "$VIASH_PAR_REFGENE" ] && ViashError Bad arguments for option \'--refgene=*\': \'$VIASH_PAR_REFGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFGENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/rseqc/rseqc_readdistribution:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFGENE+x} ]; then + ViashError '--refgene' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="\$id.read_distribution.txt" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFGENE" ] && [ ! -e "$VIASH_PAR_REFGENE" ]; then + ViashError "Input file '$VIASH_PAR_REFGENE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFGENE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_REFGENE")" ) + VIASH_PAR_REFGENE=$(ViashDockerAutodetectMount "$VIASH_PAR_REFGENE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-rseqc_readdistribution-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\"'\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +read_distribution.py \\ + -i \$par_input \\ + -r \$par_refgene \\ +> \$par_output +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") + fi + if [ ! -z "$VIASH_PAR_REFGENE" ]; then + VIASH_PAR_REFGENE=$(ViashDockerStripAutomount "$VIASH_PAR_REFGENE") + fi + if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/rseqc/rseqc_readduplication/.config.vsh.yaml b/target/executable/rseqc/rseqc_readduplication/.config.vsh.yaml new file mode 100644 index 0000000..af14e48 --- /dev/null +++ b/target/executable/rseqc/rseqc_readduplication/.config.vsh.yaml @@ -0,0 +1,251 @@ +name: "rseqc_readduplication" +namespace: "rseqc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "input alignment file in BAM or SAM format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--read_count_upper_limit" + description: "Upper limit of reads' occurence. Only used for plotting, default\ + \ = 500 (times)." + info: null + default: + - 500 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--map_qual" + description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ + \ reads, default=30." + info: null + default: + - 30 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_duplication_rate_plot_r" + description: "R script for generating duplication rate plot" + info: null + default: + - "$id.duplication_rate_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_duplication_rate_plot" + description: "duplication rate plot (pdf)" + info: null + default: + - "$id.duplication_rate_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_duplication_rate_mapping" + description: "Summary of mapping-based read duplication" + info: null + default: + - "$id.duplication_rate_mapping.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_duplication_rate_sequence" + description: "Summary of sequencing-based read duplication" + info: null + default: + - "$id.duplication_rate_sequencing.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Calculate read duplication rate.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.paired_end.sorted.bam" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/rseqc/readduplication/main.nf" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "python3-pip" + - "r-base" + interactive: false + - type: "python" + user: false + packages: + - "RSeQC" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rseqc/rseqc_readduplication/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/rseqc/rseqc_readduplication" + executable: "target/executable/rseqc/rseqc_readduplication/rseqc_readduplication" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/rseqc/rseqc_readduplication/nextflow_labels.config b/target/executable/rseqc/rseqc_readduplication/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/rseqc/rseqc_readduplication/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/rseqc/rseqc_readduplication/rseqc_readduplication b/target/executable/rseqc/rseqc_readduplication/rseqc_readduplication new file mode 100755 index 0000000..80f56a8 --- /dev/null +++ b/target/executable/rseqc/rseqc_readduplication/rseqc_readduplication @@ -0,0 +1,1289 @@ +#!/usr/bin/env bash + +# rseqc_readduplication v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="rseqc_readduplication" +VIASH_META_FUNCTIONALITY_NAME="rseqc_readduplication" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y python3-pip r-base && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "RSeQC" + +LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_readduplication" +LABEL org.opencontainers.image.created="2025-05-07T12:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rseqc_readduplication v0.3.0" + echo "" + echo "Calculate read duplication rate." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " input alignment file in BAM or SAM format" + echo "" + echo " --read_count_upper_limit" + echo " type: integer" + echo " default: 500" + echo " min: 1" + echo " Upper limit of reads' occurence. Only used for plotting, default = 500" + echo " (times)." + echo "" + echo " --map_qual" + echo " type: integer" + echo " default: 30" + echo " min: 0" + echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" + echo " reads, default=30." + echo "" + echo "Output:" + echo " --output_duplication_rate_plot_r" + echo " type: file, output, file must exist" + echo " default: \$id.duplication_rate_plot.r" + echo " R script for generating duplication rate plot" + echo "" + echo " --output_duplication_rate_plot" + echo " type: file, output, file must exist" + echo " default: \$id.duplication_rate_plot.pdf" + echo " duplication rate plot (pdf)" + echo "" + echo " --output_duplication_rate_mapping" + echo " type: file, output, file must exist" + echo " default: \$id.duplication_rate_mapping.xls" + echo " Summary of mapping-based read duplication" + echo "" + echo " --output_duplication_rate_sequence" + echo " type: file, output, file must exist" + echo " default: \$id.duplication_rate_sequencing.xls" + echo " Summary of sequencing-based read duplication" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "rseqc_readduplication v0.3.0" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --read_count_upper_limit) + [ -n "$VIASH_PAR_READ_COUNT_UPPER_LIMIT" ] && ViashError Bad arguments for option \'--read_count_upper_limit\': \'$VIASH_PAR_READ_COUNT_UPPER_LIMIT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_COUNT_UPPER_LIMIT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_count_upper_limit. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_count_upper_limit=*) + [ -n "$VIASH_PAR_READ_COUNT_UPPER_LIMIT" ] && ViashError Bad arguments for option \'--read_count_upper_limit=*\': \'$VIASH_PAR_READ_COUNT_UPPER_LIMIT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_COUNT_UPPER_LIMIT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --map_qual) + [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAP_QUAL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --map_qual. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --map_qual=*) + [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual=*\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAP_QUAL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_duplication_rate_plot_r) + [ -n "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R" ] && ViashError Bad arguments for option \'--output_duplication_rate_plot_r\': \'$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_duplication_rate_plot_r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_duplication_rate_plot_r=*) + [ -n "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R" ] && ViashError Bad arguments for option \'--output_duplication_rate_plot_r=*\': \'$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_duplication_rate_plot) + [ -n "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT" ] && ViashError Bad arguments for option \'--output_duplication_rate_plot\': \'$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_duplication_rate_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_duplication_rate_plot=*) + [ -n "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT" ] && ViashError Bad arguments for option \'--output_duplication_rate_plot=*\': \'$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_duplication_rate_mapping) + [ -n "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING" ] && ViashError Bad arguments for option \'--output_duplication_rate_mapping\': \'$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_duplication_rate_mapping. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_duplication_rate_mapping=*) + [ -n "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING" ] && ViashError Bad arguments for option \'--output_duplication_rate_mapping=*\': \'$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_duplication_rate_sequence) + [ -n "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE" ] && ViashError Bad arguments for option \'--output_duplication_rate_sequence\': \'$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_duplication_rate_sequence. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_duplication_rate_sequence=*) + [ -n "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE" ] && ViashError Bad arguments for option \'--output_duplication_rate_sequence=*\': \'$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/rseqc/rseqc_readduplication:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_READ_COUNT_UPPER_LIMIT+x} ]; then + VIASH_PAR_READ_COUNT_UPPER_LIMIT="500" +fi +if [ -z ${VIASH_PAR_MAP_QUAL+x} ]; then + VIASH_PAR_MAP_QUAL="30" +fi +if [ -z ${VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R+x} ]; then + VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R="\$id.duplication_rate_plot.r" +fi +if [ -z ${VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT+x} ]; then + VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT="\$id.duplication_rate_plot.pdf" +fi +if [ -z ${VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING+x} ]; then + VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING="\$id.duplication_rate_mapping.xls" +fi +if [ -z ${VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE+x} ]; then + VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE="\$id.duplication_rate_sequencing.xls" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_READ_COUNT_UPPER_LIMIT" ]]; then + if ! [[ "$VIASH_PAR_READ_COUNT_UPPER_LIMIT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--read_count_upper_limit' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_READ_COUNT_UPPER_LIMIT -lt 1 ]]; then + ViashError '--read_count_upper_limit' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAP_QUAL" ]]; then + if ! [[ "$VIASH_PAR_MAP_QUAL" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--map_qual' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_MAP_QUAL -lt 0 ]]; then + ViashError '--map_qual' has be more than or equal to 0. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R")" ) + VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT")" ) + VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING")" ) + VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE")" ) + VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-rseqc_readduplication-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_READ_COUNT_UPPER_LIMIT+x} ]; then echo "${VIASH_PAR_READ_COUNT_UPPER_LIMIT}" | sed "s#'#'\"'\"'#g;s#.*#par_read_count_upper_limit='&'#" ; else echo "# par_read_count_upper_limit="; fi ) +$( if [ ! -z ${VIASH_PAR_MAP_QUAL+x} ]; then echo "${VIASH_PAR_MAP_QUAL}" | sed "s#'#'\"'\"'#g;s#.*#par_map_qual='&'#" ; else echo "# par_map_qual="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R}" | sed "s#'#'\"'\"'#g;s#.*#par_output_duplication_rate_plot_r='&'#" ; else echo "# par_output_duplication_rate_plot_r="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT}" | sed "s#'#'\"'\"'#g;s#.*#par_output_duplication_rate_plot='&'#" ; else echo "# par_output_duplication_rate_plot="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING}" | sed "s#'#'\"'\"'#g;s#.*#par_output_duplication_rate_mapping='&'#" ; else echo "# par_output_duplication_rate_mapping="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE}" | sed "s#'#'\"'\"'#g;s#.*#par_output_duplication_rate_sequence='&'#" ; else echo "# par_output_duplication_rate_sequence="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +prefix=\$(openssl rand -hex 8) + +read_duplication.py \\ + -i \$par_input \\ + -o \$prefix \\ + -u \$par_read_count_upper_limit \\ + -q \$par_map_qual + +[[ -f "\$prefix.DupRate_plot.pdf" ]] && mv \$prefix.DupRate_plot.pdf \$par_output_duplication_rate_plot +[[ -f "\$prefix.DupRate_plot.r" ]] && mv \$prefix.DupRate_plot.r \$par_output_duplication_rate_plot_r +[[ -f "\$prefix.pos.DupRate.xls" ]] && mv \$prefix.pos.DupRate.xls \$par_output_duplication_rate_mapping +[[ -f "\$prefix.seq.DupRate.xls" ]] && mv \$prefix.seq.DupRate.xls \$par_output_duplication_rate_sequence +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R" ]; then + VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT" ]; then + VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING" ]; then + VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE" ]; then + VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R" ] && [ ! -e "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT" ] && [ ! -e "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING" ] && [ ! -e "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE" ] && [ ! -e "$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/rseqc/rseqc_tin/.config.vsh.yaml b/target/executable/rseqc/rseqc_tin/.config.vsh.yaml new file mode 100644 index 0000000..6278a57 --- /dev/null +++ b/target/executable/rseqc/rseqc_tin/.config.vsh.yaml @@ -0,0 +1,254 @@ +name: "rseqc_tin" +namespace: "rseqc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--bam_input" + description: "Path to input alignment file in BAM or SAM format." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bai_input" + description: "Path to bam index file in bai format." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--refgene" + description: "BED file containing the reference gene model" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--minimum_coverage" + description: "Minimum number of reads mapped to a transcript, default = 10." + info: null + default: + - 10 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sample_size" + description: "Number of equal-spaced nucleotide positions picked from mRNA. Note,\ + \ if this number is larger than the length of mRNA (L), it will be halved until\ + \ it's smaller than L (default = 100)" + info: null + default: + - 100 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--subtract_background" + description: "Set flag to subtract background noise (estimated from intronic reads).\ + \ Only use this option if there are substantial intronic reads." + info: null + direction: "input" +- name: "Output" + arguments: + - type: "file" + name: "--output_tin_summary" + description: "summary statistics (txt) of calculated TIN metrics" + info: null + default: + - "$id.tin_summary.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_tin" + description: "file with TIN metrics (xls)" + info: null + default: + - "$id.tin.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Calculte TIN (transcript integrity number) from RNA-seq reads\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.paired_end.sorted.bam" +- type: "file" + path: "test.paired_end.sorted.bam.bai" +- type: "file" + path: "test.bed12" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/rseqc/tin/main.nf" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "python3-pip" + interactive: false + - type: "docker" + run: + - "pip3 install RSeQC\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rseqc/tin/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/rseqc/rseqc_tin" + executable: "target/executable/rseqc/rseqc_tin/rseqc_tin" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/rseqc/rseqc_tin/nextflow_labels.config b/target/executable/rseqc/rseqc_tin/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/rseqc/rseqc_tin/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/rseqc/rseqc_tin/rseqc_tin b/target/executable/rseqc/rseqc_tin/rseqc_tin new file mode 100755 index 0000000..08d363a --- /dev/null +++ b/target/executable/rseqc/rseqc_tin/rseqc_tin @@ -0,0 +1,1313 @@ +#!/usr/bin/env bash + +# rseqc_tin v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="rseqc_tin" +VIASH_META_FUNCTIONALITY_NAME="rseqc_tin" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y python3-pip && \ + rm -rf /var/lib/apt/lists/* + +RUN pip3 install RSeQC + +LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_tin" +LABEL org.opencontainers.image.created="2025-05-07T12:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rseqc_tin v0.3.0" + echo "" + echo "Calculte TIN (transcript integrity number) from RNA-seq reads" + echo "" + echo "Input:" + echo " --bam_input" + echo " type: file, required parameter, file must exist" + echo " Path to input alignment file in BAM or SAM format." + echo "" + echo " --bai_input" + echo " type: file, required parameter, file must exist" + echo " Path to bam index file in bai format." + echo "" + echo " --refgene" + echo " type: file, required parameter, file must exist" + echo " BED file containing the reference gene model" + echo "" + echo " --minimum_coverage" + echo " type: integer" + echo " default: 10" + echo " min: 1" + echo " Minimum number of reads mapped to a transcript, default = 10." + echo "" + echo " --sample_size" + echo " type: integer" + echo " default: 100" + echo " min: 1" + echo " Number of equal-spaced nucleotide positions picked from mRNA. Note, if" + echo " this number is larger than the length of mRNA (L), it will be halved" + echo " until it's smaller than L (default = 100)" + echo "" + echo " --subtract_background" + echo " type: boolean_true" + echo " Set flag to subtract background noise (estimated from intronic reads)." + echo " Only use this option if there are substantial intronic reads." + echo "" + echo "Output:" + echo " --output_tin_summary" + echo " type: file, output, file must exist" + echo " default: \$id.tin_summary.txt" + echo " summary statistics (txt) of calculated TIN metrics" + echo "" + echo " --output_tin" + echo " type: file, output, file must exist" + echo " default: \$id.tin.xls" + echo " file with TIN metrics (xls)" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "rseqc_tin v0.3.0" + exit + ;; + --bam_input) + [ -n "$VIASH_PAR_BAM_INPUT" ] && ViashError Bad arguments for option \'--bam_input\': \'$VIASH_PAR_BAM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam_input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam_input=*) + [ -n "$VIASH_PAR_BAM_INPUT" ] && ViashError Bad arguments for option \'--bam_input=*\': \'$VIASH_PAR_BAM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bai_input) + [ -n "$VIASH_PAR_BAI_INPUT" ] && ViashError Bad arguments for option \'--bai_input\': \'$VIASH_PAR_BAI_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAI_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bai_input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bai_input=*) + [ -n "$VIASH_PAR_BAI_INPUT" ] && ViashError Bad arguments for option \'--bai_input=*\': \'$VIASH_PAR_BAI_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAI_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --refgene) + [ -n "$VIASH_PAR_REFGENE" ] && ViashError Bad arguments for option \'--refgene\': \'$VIASH_PAR_REFGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFGENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --refgene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --refgene=*) + [ -n "$VIASH_PAR_REFGENE" ] && ViashError Bad arguments for option \'--refgene=*\': \'$VIASH_PAR_REFGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFGENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --minimum_coverage) + [ -n "$VIASH_PAR_MINIMUM_COVERAGE" ] && ViashError Bad arguments for option \'--minimum_coverage\': \'$VIASH_PAR_MINIMUM_COVERAGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMUM_COVERAGE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --minimum_coverage. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --minimum_coverage=*) + [ -n "$VIASH_PAR_MINIMUM_COVERAGE" ] && ViashError Bad arguments for option \'--minimum_coverage=*\': \'$VIASH_PAR_MINIMUM_COVERAGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMUM_COVERAGE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sample_size) + [ -n "$VIASH_PAR_SAMPLE_SIZE" ] && ViashError Bad arguments for option \'--sample_size\': \'$VIASH_PAR_SAMPLE_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sample_size=*) + [ -n "$VIASH_PAR_SAMPLE_SIZE" ] && ViashError Bad arguments for option \'--sample_size=*\': \'$VIASH_PAR_SAMPLE_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --subtract_background) + [ -n "$VIASH_PAR_SUBTRACT_BACKGROUND" ] && ViashError Bad arguments for option \'--subtract_background\': \'$VIASH_PAR_SUBTRACT_BACKGROUND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUBTRACT_BACKGROUND=true + shift 1 + ;; + --output_tin_summary) + [ -n "$VIASH_PAR_OUTPUT_TIN_SUMMARY" ] && ViashError Bad arguments for option \'--output_tin_summary\': \'$VIASH_PAR_OUTPUT_TIN_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_TIN_SUMMARY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_tin_summary. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_tin_summary=*) + [ -n "$VIASH_PAR_OUTPUT_TIN_SUMMARY" ] && ViashError Bad arguments for option \'--output_tin_summary=*\': \'$VIASH_PAR_OUTPUT_TIN_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_TIN_SUMMARY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_tin) + [ -n "$VIASH_PAR_OUTPUT_TIN" ] && ViashError Bad arguments for option \'--output_tin\': \'$VIASH_PAR_OUTPUT_TIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_TIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_tin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_tin=*) + [ -n "$VIASH_PAR_OUTPUT_TIN" ] && ViashError Bad arguments for option \'--output_tin=*\': \'$VIASH_PAR_OUTPUT_TIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_TIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/rseqc/rseqc_tin:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_BAM_INPUT+x} ]; then + ViashError '--bam_input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_BAI_INPUT+x} ]; then + ViashError '--bai_input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFGENE+x} ]; then + ViashError '--refgene' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MINIMUM_COVERAGE+x} ]; then + VIASH_PAR_MINIMUM_COVERAGE="10" +fi +if [ -z ${VIASH_PAR_SAMPLE_SIZE+x} ]; then + VIASH_PAR_SAMPLE_SIZE="100" +fi +if [ -z ${VIASH_PAR_SUBTRACT_BACKGROUND+x} ]; then + VIASH_PAR_SUBTRACT_BACKGROUND="false" +fi +if [ -z ${VIASH_PAR_OUTPUT_TIN_SUMMARY+x} ]; then + VIASH_PAR_OUTPUT_TIN_SUMMARY="\$id.tin_summary.txt" +fi +if [ -z ${VIASH_PAR_OUTPUT_TIN+x} ]; then + VIASH_PAR_OUTPUT_TIN="\$id.tin.xls" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_BAM_INPUT" ] && [ ! -e "$VIASH_PAR_BAM_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_BAM_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BAI_INPUT" ] && [ ! -e "$VIASH_PAR_BAI_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_BAI_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFGENE" ] && [ ! -e "$VIASH_PAR_REFGENE" ]; then + ViashError "Input file '$VIASH_PAR_REFGENE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MINIMUM_COVERAGE" ]]; then + if ! [[ "$VIASH_PAR_MINIMUM_COVERAGE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--minimum_coverage' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_MINIMUM_COVERAGE -lt 1 ]]; then + ViashError '--minimum_coverage' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SAMPLE_SIZE" ]]; then + if ! [[ "$VIASH_PAR_SAMPLE_SIZE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sample_size' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLE_SIZE -lt 1 ]]; then + ViashError '--sample_size' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SUBTRACT_BACKGROUND" ]]; then + if ! [[ "$VIASH_PAR_SUBTRACT_BACKGROUND" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--subtract_background' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT_TIN_SUMMARY" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_TIN_SUMMARY")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_TIN_SUMMARY")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_TIN" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_TIN")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_TIN")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_BAM_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BAM_INPUT")" ) + VIASH_PAR_BAM_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_BAM_INPUT") +fi +if [ ! -z "$VIASH_PAR_BAI_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BAI_INPUT")" ) + VIASH_PAR_BAI_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_BAI_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFGENE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_REFGENE")" ) + VIASH_PAR_REFGENE=$(ViashDockerAutodetectMount "$VIASH_PAR_REFGENE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_TIN_SUMMARY" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_TIN_SUMMARY")" ) + VIASH_PAR_OUTPUT_TIN_SUMMARY=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_TIN_SUMMARY") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_TIN_SUMMARY" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_TIN" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_TIN")" ) + VIASH_PAR_OUTPUT_TIN=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_TIN") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_TIN" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-rseqc_tin-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_BAM_INPUT+x} ]; then echo "${VIASH_PAR_BAM_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_bam_input='&'#" ; else echo "# par_bam_input="; fi ) +$( if [ ! -z ${VIASH_PAR_BAI_INPUT+x} ]; then echo "${VIASH_PAR_BAI_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_bai_input='&'#" ; else echo "# par_bai_input="; fi ) +$( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\"'\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) +$( if [ ! -z ${VIASH_PAR_MINIMUM_COVERAGE+x} ]; then echo "${VIASH_PAR_MINIMUM_COVERAGE}" | sed "s#'#'\"'\"'#g;s#.*#par_minimum_coverage='&'#" ; else echo "# par_minimum_coverage="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_SIZE+x} ]; then echo "${VIASH_PAR_SAMPLE_SIZE}" | sed "s#'#'\"'\"'#g;s#.*#par_sample_size='&'#" ; else echo "# par_sample_size="; fi ) +$( if [ ! -z ${VIASH_PAR_SUBTRACT_BACKGROUND+x} ]; then echo "${VIASH_PAR_SUBTRACT_BACKGROUND}" | sed "s#'#'\"'\"'#g;s#.*#par_subtract_background='&'#" ; else echo "# par_subtract_background="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_TIN_SUMMARY+x} ]; then echo "${VIASH_PAR_OUTPUT_TIN_SUMMARY}" | sed "s#'#'\"'\"'#g;s#.*#par_output_tin_summary='&'#" ; else echo "# par_output_tin_summary="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_TIN+x} ]; then echo "${VIASH_PAR_OUTPUT_TIN}" | sed "s#'#'\"'\"'#g;s#.*#par_output_tin='&'#" ; else echo "# par_output_tin="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +bam_file="\$(basename -- \$par_bam_input)" +bai_file="\$(basename -- \$par_bai_input)" + +echo "\$bam_file" +echo "\$bai_file" + +tmpdir=\$(mktemp -d "\$meta_temp_dir/\$meta_functionality_name-XXXXXXXX") +function clean_up { + rm -rf "\$tmpdir" +} + +cp \$par_bam_input \$tmpdir/\$bam_file +cp \$par_bai_input \$tmpdir/\$bai_file + +tin.py \\ + -i \$tmpdir/\$bam_file \\ + -r \$par_refgene \\ + -c \$par_minimum_coverage \\ + -n \$par_sample_size \\ + -s \$par_subtract_background + +[[ -f "\${bam_file%.*}.summary.txt" ]] && mv \${bam_file%.*}.summary.txt \$par_output_tin_summary +[[ -f "\${bam_file%.*}.tin.xls" ]] && mv \${bam_file%.*}.tin.xls \$par_output_tin + +clean_up +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_BAM_INPUT" ]; then + VIASH_PAR_BAM_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_BAM_INPUT") + fi + if [ ! -z "$VIASH_PAR_BAI_INPUT" ]; then + VIASH_PAR_BAI_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_BAI_INPUT") + fi + if [ ! -z "$VIASH_PAR_REFGENE" ]; then + VIASH_PAR_REFGENE=$(ViashDockerStripAutomount "$VIASH_PAR_REFGENE") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_TIN_SUMMARY" ]; then + VIASH_PAR_OUTPUT_TIN_SUMMARY=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_TIN_SUMMARY") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_TIN" ]; then + VIASH_PAR_OUTPUT_TIN=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_TIN") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT_TIN_SUMMARY" ] && [ ! -e "$VIASH_PAR_OUTPUT_TIN_SUMMARY" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_TIN_SUMMARY' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_TIN" ] && [ ! -e "$VIASH_PAR_OUTPUT_TIN" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_TIN' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/sortmerna/.config.vsh.yaml b/target/executable/sortmerna/.config.vsh.yaml new file mode 100644 index 0000000..2947adf --- /dev/null +++ b/target/executable/sortmerna/.config.vsh.yaml @@ -0,0 +1,232 @@ +name: "sortmerna" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "boolean" + name: "--paired" + description: "Are the reads single-end or paired-end" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input" + description: "Input fastq" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--ribo_database_manifest" + description: "Text file containing paths to fasta files (one per line) that will\ + \ be used to create the database for SortMeRNA." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--sortmerna_log" + description: "Sortmerna log file." + info: null + default: + - "$id.sortmerna.log" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_1" + description: "Output file for read 1." + info: null + default: + - "$id.$key.read_1.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Output file for read 2." + info: null + default: + - "$id.$key.read_2.fastq.gz" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Local sequence alignment tool for filtering, mapping and clustering.\ + \ The main application of SortMeRNA is filtering rRNA from metatranscriptomic data.\ + \ SortMeRNA takes as input files of reads (fasta, fastq, fasta.gz, fastq.gz) and\ + \ one or multiple rRNA database file(s), and sorts apart aligned and rejected reads\ + \ into two files.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "SRR6357070_1.fastq.gz" +- type: "file" + path: "SRR6357070_2.fastq.gz" +- type: "file" + path: "rRNA" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/sortmerna/main.nf" + - "modules/nf-core/sortmerna/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/sortmerna/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/sortmerna" + executable: "target/executable/sortmerna/sortmerna" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/sortmerna/nextflow_labels.config b/target/executable/sortmerna/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/sortmerna/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/sortmerna/sortmerna b/target/executable/sortmerna/sortmerna new file mode 100755 index 0000000..7450f62 --- /dev/null +++ b/target/executable/sortmerna/sortmerna @@ -0,0 +1,1313 @@ +#!/usr/bin/env bash + +# sortmerna v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="sortmerna" +VIASH_META_FUNCTIONALITY_NAME="sortmerna" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0 +ENTRYPOINT [] +LABEL org.opencontainers.image.description="Companion container for running component sortmerna" +LABEL org.opencontainers.image.created="2025-05-07T12:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "sortmerna v0.3.0" + echo "" + echo "Local sequence alignment tool for filtering, mapping and clustering. The main" + echo "application of SortMeRNA is filtering rRNA from metatranscriptomic data." + echo "SortMeRNA takes as input files of reads (fasta, fastq, fasta.gz, fastq.gz) and" + echo "one or multiple rRNA database file(s), and sorts apart aligned and rejected" + echo "reads into two files." + echo "" + echo "Input:" + echo " --paired" + echo " type: boolean" + echo " Are the reads single-end or paired-end" + echo "" + echo " --input" + echo " type: file, multiple values allowed, file must exist" + echo " Input fastq" + echo "" + echo " --ribo_database_manifest" + echo " type: file, multiple values allowed, file must exist" + echo " Text file containing paths to fasta files (one per line) that will be" + echo " used to create the database for SortMeRNA." + echo "" + echo "Output:" + echo " --sortmerna_log" + echo " type: file, output" + echo " default: \$id.sortmerna.log" + echo " Sortmerna log file." + echo "" + echo " --fastq_1" + echo " type: file, required parameter, output, file must exist" + echo " default: \$id.\$key.read_1.fastq.gz" + echo " Output file for read 1." + echo "" + echo " --fastq_2" + echo " type: file, output" + echo " default: \$id.\$key.read_2.fastq.gz" + echo " Output file for read 2." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "sortmerna v0.3.0" + exit + ;; + --paired) + [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PAIRED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --paired. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --paired=*) + [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired=*\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PAIRED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --ribo_database_manifest) + if [ -z "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ]; then + VIASH_PAR_RIBO_DATABASE_MANIFEST="$2" + else + VIASH_PAR_RIBO_DATABASE_MANIFEST="$VIASH_PAR_RIBO_DATABASE_MANIFEST;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --ribo_database_manifest. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --ribo_database_manifest=*) + if [ -z "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ]; then + VIASH_PAR_RIBO_DATABASE_MANIFEST=$(ViashRemoveFlags "$1") + else + VIASH_PAR_RIBO_DATABASE_MANIFEST="$VIASH_PAR_RIBO_DATABASE_MANIFEST;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sortmerna_log) + [ -n "$VIASH_PAR_SORTMERNA_LOG" ] && ViashError Bad arguments for option \'--sortmerna_log\': \'$VIASH_PAR_SORTMERNA_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SORTMERNA_LOG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sortmerna_log. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sortmerna_log=*) + [ -n "$VIASH_PAR_SORTMERNA_LOG" ] && ViashError Bad arguments for option \'--sortmerna_log=*\': \'$VIASH_PAR_SORTMERNA_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SORTMERNA_LOG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastq_1) + [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_1=*) + [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1=*\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastq_2) + [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_2=*) + [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2=*\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/sortmerna:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_FASTQ_1+x} ]; then + ViashError '--fastq_1' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_SORTMERNA_LOG+x} ]; then + VIASH_PAR_SORTMERNA_LOG="\$id.sortmerna.log" +fi +if [ -z ${VIASH_PAR_FASTQ_2+x} ]; then + VIASH_PAR_FASTQ_2="\$id.\$key.read_2.fastq.gz" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_INPUT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_RIBO_DATABASE_MANIFEST; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_PAIRED" ]]; then + if ! [[ "$VIASH_PAR_PAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--paired' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_SORTMERNA_LOG" ] && [ ! -d "$(dirname "$VIASH_PAR_SORTMERNA_LOG")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_SORTMERNA_LOG")" +fi +if [ ! -z "$VIASH_PAR_FASTQ_1" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQ_1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTQ_1")" +fi +if [ ! -z "$VIASH_PAR_FASTQ_2" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQ_2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTQ_2")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_TEST_INPUT=() + IFS=',' + for var in $VIASH_PAR_INPUT; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_INPUT+=( "$var" ) + done + VIASH_PAR_INPUT=$(IFS=',' ; echo "${VIASH_TEST_INPUT[*]}") +fi +if [ ! -z "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ]; then + VIASH_TEST_RIBO_DATABASE_MANIFEST=() + IFS=';' + for var in $VIASH_PAR_RIBO_DATABASE_MANIFEST; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_RIBO_DATABASE_MANIFEST+=( "$var" ) + done + VIASH_PAR_RIBO_DATABASE_MANIFEST=$(IFS=';' ; echo "${VIASH_TEST_RIBO_DATABASE_MANIFEST[*]}") +fi +if [ ! -z "$VIASH_PAR_SORTMERNA_LOG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_SORTMERNA_LOG")" ) + VIASH_PAR_SORTMERNA_LOG=$(ViashDockerAutodetectMount "$VIASH_PAR_SORTMERNA_LOG") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_SORTMERNA_LOG" ) +fi +if [ ! -z "$VIASH_PAR_FASTQ_1" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTQ_1")" ) + VIASH_PAR_FASTQ_1=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTQ_1") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_FASTQ_1" ) +fi +if [ ! -z "$VIASH_PAR_FASTQ_2" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTQ_2")" ) + VIASH_PAR_FASTQ_2=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTQ_2") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_FASTQ_2" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-sortmerna-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\"'\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_RIBO_DATABASE_MANIFEST+x} ]; then echo "${VIASH_PAR_RIBO_DATABASE_MANIFEST}" | sed "s#'#'\"'\"'#g;s#.*#par_ribo_database_manifest='&'#" ; else echo "# par_ribo_database_manifest="; fi ) +$( if [ ! -z ${VIASH_PAR_SORTMERNA_LOG+x} ]; then echo "${VIASH_PAR_SORTMERNA_LOG}" | sed "s#'#'\"'\"'#g;s#.*#par_sortmerna_log='&'#" ; else echo "# par_sortmerna_log="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQ_1+x} ]; then echo "${VIASH_PAR_FASTQ_1}" | sed "s#'#'\"'\"'#g;s#.*#par_fastq_1='&'#" ; else echo "# par_fastq_1="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQ_2+x} ]; then echo "${VIASH_PAR_FASTQ_2}" | sed "s#'#'\"'\"'#g;s#.*#par_fastq_2='&'#" ; else echo "# par_fastq_2="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +IFS="," read -ra input <<< "\$par_input" +IFS=";" read -ra paths <<< "\$par_ribo_database_manifest" +refs="" +for i in "\${paths[@]}" +do + refs+="-ref \$i " +done + +if [ "\$par_paired" == "false" ]; then + sortmerna \\ + \$refs \\ + -reads \${input[0]} \\ + --threads \${meta_cpus:-1} \\ + --workdir . \\ + --aligned rRNA_reads \\ + --fastx \\ + -num_alignments 1 \\ + --other non_rRNA_reads + mv non_rRNA_reads.f*q.gz "\$par_fastq_1" +else + sortmerna \\ + \$refs \\ + -reads \${input[0]} \\ + --reads \${input[1]} \\ + --threads \${meta_cpus:-1} \\ + --workdir . \\ + --aligned rRNA_reads \\ + --fastx \\ + --num_alignments 1 \\ + --other non_rRNA_reads \\ + --paired_in \\ + --out2 + mv non_rRNA_reads_fwd.f*q.gz \$par_fastq_1 + mv non_rRNA_reads_rev.f*q.gz \$par_fastq_2 +fi + +mv rRNA_reads.log \$par_sortmerna_log +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + unset VIASH_TEST_INPUT + IFS=',' + for var in $VIASH_PAR_INPUT; do + unset IFS + if [ -z "$VIASH_TEST_INPUT" ]; then + VIASH_TEST_INPUT="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_INPUT="$VIASH_TEST_INPUT,""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT="$VIASH_TEST_INPUT" + fi + if [ ! -z "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ]; then + unset VIASH_TEST_RIBO_DATABASE_MANIFEST + IFS=';' + for var in $VIASH_PAR_RIBO_DATABASE_MANIFEST; do + unset IFS + if [ -z "$VIASH_TEST_RIBO_DATABASE_MANIFEST" ]; then + VIASH_TEST_RIBO_DATABASE_MANIFEST="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_RIBO_DATABASE_MANIFEST="$VIASH_TEST_RIBO_DATABASE_MANIFEST;""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_RIBO_DATABASE_MANIFEST="$VIASH_TEST_RIBO_DATABASE_MANIFEST" + fi + if [ ! -z "$VIASH_PAR_SORTMERNA_LOG" ]; then + VIASH_PAR_SORTMERNA_LOG=$(ViashDockerStripAutomount "$VIASH_PAR_SORTMERNA_LOG") + fi + if [ ! -z "$VIASH_PAR_FASTQ_1" ]; then + VIASH_PAR_FASTQ_1=$(ViashDockerStripAutomount "$VIASH_PAR_FASTQ_1") + fi + if [ ! -z "$VIASH_PAR_FASTQ_2" ]; then + VIASH_PAR_FASTQ_2=$(ViashDockerStripAutomount "$VIASH_PAR_FASTQ_2") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTQ_1" ] && [ ! -e "$VIASH_PAR_FASTQ_1" ]; then + ViashError "Output file '$VIASH_PAR_FASTQ_1' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/stringtie/.config.vsh.yaml b/target/executable/stringtie/.config.vsh.yaml new file mode 100644 index 0000000..22c5f4c --- /dev/null +++ b/target/executable/stringtie/.config.vsh.yaml @@ -0,0 +1,256 @@ +name: "stringtie" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--strandedness" + description: "Forward or reverse strand?" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--annotation_gtf" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_stringtie_args" + description: "Extra arguments for running StringTie" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--stringtie_ignore_gtf" + description: "Perform reference-guided de novo assembly of transcripts using StringTie,\ + \ i.e. don't restrict to those in GTF file." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--transcript_gtf" + info: null + default: + - "$id.$key.transcripts.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--coverage_gtf" + info: null + default: + - "$id.$key.coverage.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--abundance" + info: null + default: + - "$id.$key.abundance.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--ballgown" + description: "for running ballgown" + info: null + default: + - "$id.$key.ballgown" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Transcript assembly and quantification for RNA-Seq\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "genes.gtf" +- type: "file" + path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/stringtie/stringtie/main.nf" + - "modules/nf-core/stringtie/stringtie/meta.yml" + last_sha: "55398de6ab7577acfe9b1180016a93d7af7eb859" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && \\\napt-get install -y build-essential zlib1g wget && \\\n\ + wget --no-check-certificate https://github.com/gpertea/stringtie/releases/download/v2.2.1/stringtie-2.2.1.Linux_x86_64.tar.gz\ + \ && \\\ntar -xzf stringtie-2.2.1.Linux_x86_64.tar.gz && \\\ncp stringtie-2.2.1.Linux_x86_64/stringtie\ + \ /usr/local/bin/\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/stringtie/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/stringtie" + executable: "target/executable/stringtie/stringtie" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/stringtie/nextflow_labels.config b/target/executable/stringtie/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/stringtie/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/stringtie/stringtie b/target/executable/stringtie/stringtie new file mode 100755 index 0000000..6cf1f2f --- /dev/null +++ b/target/executable/stringtie/stringtie @@ -0,0 +1,1306 @@ +#!/usr/bin/env bash + +# stringtie v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="stringtie" +VIASH_META_FUNCTIONALITY_NAME="stringtie" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ +apt-get install -y build-essential zlib1g wget && \ +wget --no-check-certificate https://github.com/gpertea/stringtie/releases/download/v2.2.1/stringtie-2.2.1.Linux_x86_64.tar.gz && \ +tar -xzf stringtie-2.2.1.Linux_x86_64.tar.gz && \ +cp stringtie-2.2.1.Linux_x86_64/stringtie /usr/local/bin/ + +LABEL org.opencontainers.image.description="Companion container for running component stringtie" +LABEL org.opencontainers.image.created="2025-05-07T12:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "stringtie v0.3.0" + echo "" + echo "Transcript assembly and quantification for RNA-Seq" + echo "" + echo "Input:" + echo " --strandedness" + echo " type: string" + echo " Forward or reverse strand?" + echo "" + echo " --bam" + echo " type: file, file must exist" + echo "" + echo " --annotation_gtf" + echo " type: file, file must exist" + echo "" + echo " --extra_stringtie_args" + echo " type: string" + echo " Extra arguments for running StringTie" + echo "" + echo " --stringtie_ignore_gtf" + echo " type: boolean" + echo " Perform reference-guided de novo assembly of transcripts using" + echo " StringTie, i.e. don't restrict to those in GTF file." + echo "" + echo "Output:" + echo " --transcript_gtf" + echo " type: file, output, file must exist" + echo " default: \$id.\$key.transcripts.gtf" + echo "" + echo " --coverage_gtf" + echo " type: file, output, file must exist" + echo " default: \$id.\$key.coverage.gtf" + echo "" + echo " --abundance" + echo " type: file, output, file must exist" + echo " default: \$id.\$key.abundance.txt" + echo "" + echo " --ballgown" + echo " type: file, output, file must exist" + echo " default: \$id.\$key.ballgown" + echo " for running ballgown" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "stringtie v0.3.0" + exit + ;; + --strandedness) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --strandedness. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strandedness=*) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness=*\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bam) + [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam=*) + [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam=*\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --annotation_gtf) + [ -n "$VIASH_PAR_ANNOTATION_GTF" ] && ViashError Bad arguments for option \'--annotation_gtf\': \'$VIASH_PAR_ANNOTATION_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ANNOTATION_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --annotation_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --annotation_gtf=*) + [ -n "$VIASH_PAR_ANNOTATION_GTF" ] && ViashError Bad arguments for option \'--annotation_gtf=*\': \'$VIASH_PAR_ANNOTATION_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ANNOTATION_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --extra_stringtie_args) + [ -n "$VIASH_PAR_EXTRA_STRINGTIE_ARGS" ] && ViashError Bad arguments for option \'--extra_stringtie_args\': \'$VIASH_PAR_EXTRA_STRINGTIE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_STRINGTIE_ARGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_stringtie_args. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --extra_stringtie_args=*) + [ -n "$VIASH_PAR_EXTRA_STRINGTIE_ARGS" ] && ViashError Bad arguments for option \'--extra_stringtie_args=*\': \'$VIASH_PAR_EXTRA_STRINGTIE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_STRINGTIE_ARGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --stringtie_ignore_gtf) + [ -n "$VIASH_PAR_STRINGTIE_IGNORE_GTF" ] && ViashError Bad arguments for option \'--stringtie_ignore_gtf\': \'$VIASH_PAR_STRINGTIE_IGNORE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_IGNORE_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --stringtie_ignore_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --stringtie_ignore_gtf=*) + [ -n "$VIASH_PAR_STRINGTIE_IGNORE_GTF" ] && ViashError Bad arguments for option \'--stringtie_ignore_gtf=*\': \'$VIASH_PAR_STRINGTIE_IGNORE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_IGNORE_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcript_gtf) + [ -n "$VIASH_PAR_TRANSCRIPT_GTF" ] && ViashError Bad arguments for option \'--transcript_gtf\': \'$VIASH_PAR_TRANSCRIPT_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcript_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcript_gtf=*) + [ -n "$VIASH_PAR_TRANSCRIPT_GTF" ] && ViashError Bad arguments for option \'--transcript_gtf=*\': \'$VIASH_PAR_TRANSCRIPT_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --coverage_gtf) + [ -n "$VIASH_PAR_COVERAGE_GTF" ] && ViashError Bad arguments for option \'--coverage_gtf\': \'$VIASH_PAR_COVERAGE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COVERAGE_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --coverage_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --coverage_gtf=*) + [ -n "$VIASH_PAR_COVERAGE_GTF" ] && ViashError Bad arguments for option \'--coverage_gtf=*\': \'$VIASH_PAR_COVERAGE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COVERAGE_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --abundance) + [ -n "$VIASH_PAR_ABUNDANCE" ] && ViashError Bad arguments for option \'--abundance\': \'$VIASH_PAR_ABUNDANCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ABUNDANCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --abundance. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --abundance=*) + [ -n "$VIASH_PAR_ABUNDANCE" ] && ViashError Bad arguments for option \'--abundance=*\': \'$VIASH_PAR_ABUNDANCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ABUNDANCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --ballgown) + [ -n "$VIASH_PAR_BALLGOWN" ] && ViashError Bad arguments for option \'--ballgown\': \'$VIASH_PAR_BALLGOWN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BALLGOWN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --ballgown. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --ballgown=*) + [ -n "$VIASH_PAR_BALLGOWN" ] && ViashError Bad arguments for option \'--ballgown=*\': \'$VIASH_PAR_BALLGOWN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BALLGOWN=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/stringtie:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_TRANSCRIPT_GTF+x} ]; then + VIASH_PAR_TRANSCRIPT_GTF="\$id.\$key.transcripts.gtf" +fi +if [ -z ${VIASH_PAR_COVERAGE_GTF+x} ]; then + VIASH_PAR_COVERAGE_GTF="\$id.\$key.coverage.gtf" +fi +if [ -z ${VIASH_PAR_ABUNDANCE+x} ]; then + VIASH_PAR_ABUNDANCE="\$id.\$key.abundance.txt" +fi +if [ -z ${VIASH_PAR_BALLGOWN+x} ]; then + VIASH_PAR_BALLGOWN="\$id.\$key.ballgown" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_BAM" ] && [ ! -e "$VIASH_PAR_BAM" ]; then + ViashError "Input file '$VIASH_PAR_BAM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_ANNOTATION_GTF" ] && [ ! -e "$VIASH_PAR_ANNOTATION_GTF" ]; then + ViashError "Input file '$VIASH_PAR_ANNOTATION_GTF' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_STRINGTIE_IGNORE_GTF" ]]; then + if ! [[ "$VIASH_PAR_STRINGTIE_IGNORE_GTF" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--stringtie_ignore_gtf' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_TRANSCRIPT_GTF" ] && [ ! -d "$(dirname "$VIASH_PAR_TRANSCRIPT_GTF")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRANSCRIPT_GTF")" +fi +if [ ! -z "$VIASH_PAR_COVERAGE_GTF" ] && [ ! -d "$(dirname "$VIASH_PAR_COVERAGE_GTF")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COVERAGE_GTF")" +fi +if [ ! -z "$VIASH_PAR_ABUNDANCE" ] && [ ! -d "$(dirname "$VIASH_PAR_ABUNDANCE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_ABUNDANCE")" +fi +if [ ! -z "$VIASH_PAR_BALLGOWN" ] && [ ! -d "$(dirname "$VIASH_PAR_BALLGOWN")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BALLGOWN")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_BAM" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BAM")" ) + VIASH_PAR_BAM=$(ViashDockerAutodetectMount "$VIASH_PAR_BAM") +fi +if [ ! -z "$VIASH_PAR_ANNOTATION_GTF" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_ANNOTATION_GTF")" ) + VIASH_PAR_ANNOTATION_GTF=$(ViashDockerAutodetectMount "$VIASH_PAR_ANNOTATION_GTF") +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPT_GTF" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TRANSCRIPT_GTF")" ) + VIASH_PAR_TRANSCRIPT_GTF=$(ViashDockerAutodetectMount "$VIASH_PAR_TRANSCRIPT_GTF") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_TRANSCRIPT_GTF" ) +fi +if [ ! -z "$VIASH_PAR_COVERAGE_GTF" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COVERAGE_GTF")" ) + VIASH_PAR_COVERAGE_GTF=$(ViashDockerAutodetectMount "$VIASH_PAR_COVERAGE_GTF") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_COVERAGE_GTF" ) +fi +if [ ! -z "$VIASH_PAR_ABUNDANCE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_ABUNDANCE")" ) + VIASH_PAR_ABUNDANCE=$(ViashDockerAutodetectMount "$VIASH_PAR_ABUNDANCE") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_ABUNDANCE" ) +fi +if [ ! -z "$VIASH_PAR_BALLGOWN" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BALLGOWN")" ) + VIASH_PAR_BALLGOWN=$(ViashDockerAutodetectMount "$VIASH_PAR_BALLGOWN") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_BALLGOWN" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-stringtie-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\"'\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\"'\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_ANNOTATION_GTF+x} ]; then echo "${VIASH_PAR_ANNOTATION_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_annotation_gtf='&'#" ; else echo "# par_annotation_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRA_STRINGTIE_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_STRINGTIE_ARGS}" | sed "s#'#'\"'\"'#g;s#.*#par_extra_stringtie_args='&'#" ; else echo "# par_extra_stringtie_args="; fi ) +$( if [ ! -z ${VIASH_PAR_STRINGTIE_IGNORE_GTF+x} ]; then echo "${VIASH_PAR_STRINGTIE_IGNORE_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_stringtie_ignore_gtf='&'#" ; else echo "# par_stringtie_ignore_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_TRANSCRIPT_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPT_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_transcript_gtf='&'#" ; else echo "# par_transcript_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_COVERAGE_GTF+x} ]; then echo "${VIASH_PAR_COVERAGE_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_coverage_gtf='&'#" ; else echo "# par_coverage_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_ABUNDANCE+x} ]; then echo "${VIASH_PAR_ABUNDANCE}" | sed "s#'#'\"'\"'#g;s#.*#par_abundance='&'#" ; else echo "# par_abundance="; fi ) +$( if [ ! -z ${VIASH_PAR_BALLGOWN+x} ]; then echo "${VIASH_PAR_BALLGOWN}" | sed "s#'#'\"'\"'#g;s#.*#par_ballgown='&'#" ; else echo "# par_ballgown="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +if [ \$par_strandedness=='forward' ]; then + strand='--fr' +elif [ \$par_strandedness=='reverse' ]; then + strand='--rf' +fi + +stringtie \\ + \$par_bam \\ + \$strand \\ + \${par_annotation_gtf:+-G \$par_annotation_gtf} \\ + -o \$par_transcript_gtf \\ + -A "abundance.txt" \\ + \${par_annotation_gtf:+-C "coverage.gtf"} \\ + \${par_annotation_gtf:+-b "ballgown"} \\ + -p \${meta_cpus:-1} \\ + \$par_extra_stringtie_args \\ + \${par_stringtie_ignore_gtf:+-e} + +mv coverage.gtf \$par_coverage_gtf +mv ballgown \$par_ballgown +mv abundance.txt \$par_abundance +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_BAM" ]; then + VIASH_PAR_BAM=$(ViashDockerStripAutomount "$VIASH_PAR_BAM") + fi + if [ ! -z "$VIASH_PAR_ANNOTATION_GTF" ]; then + VIASH_PAR_ANNOTATION_GTF=$(ViashDockerStripAutomount "$VIASH_PAR_ANNOTATION_GTF") + fi + if [ ! -z "$VIASH_PAR_TRANSCRIPT_GTF" ]; then + VIASH_PAR_TRANSCRIPT_GTF=$(ViashDockerStripAutomount "$VIASH_PAR_TRANSCRIPT_GTF") + fi + if [ ! -z "$VIASH_PAR_COVERAGE_GTF" ]; then + VIASH_PAR_COVERAGE_GTF=$(ViashDockerStripAutomount "$VIASH_PAR_COVERAGE_GTF") + fi + if [ ! -z "$VIASH_PAR_ABUNDANCE" ]; then + VIASH_PAR_ABUNDANCE=$(ViashDockerStripAutomount "$VIASH_PAR_ABUNDANCE") + fi + if [ ! -z "$VIASH_PAR_BALLGOWN" ]; then + VIASH_PAR_BALLGOWN=$(ViashDockerStripAutomount "$VIASH_PAR_BALLGOWN") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_TRANSCRIPT_GTF" ] && [ ! -e "$VIASH_PAR_TRANSCRIPT_GTF" ]; then + ViashError "Output file '$VIASH_PAR_TRANSCRIPT_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COVERAGE_GTF" ] && [ ! -e "$VIASH_PAR_COVERAGE_GTF" ]; then + ViashError "Output file '$VIASH_PAR_COVERAGE_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_ABUNDANCE" ] && [ ! -e "$VIASH_PAR_ABUNDANCE" ]; then + ViashError "Output file '$VIASH_PAR_ABUNDANCE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BALLGOWN" ] && [ ! -e "$VIASH_PAR_BALLGOWN" ]; then + ViashError "Output file '$VIASH_PAR_BALLGOWN' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/summarizedexperiment/.config.vsh.yaml b/target/executable/summarizedexperiment/.config.vsh.yaml new file mode 100644 index 0000000..f523478 --- /dev/null +++ b/target/executable/summarizedexperiment/.config.vsh.yaml @@ -0,0 +1,235 @@ +name: "summarizedexperiment" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--tpm_gene" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_length_scaled" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_scaled" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_transcript" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcript" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tx2gene_tsv" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + info: null + default: + - "merged_summarizedexperiment" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "summarizedexperiment.r" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Create SummarizedExperiment object from Salmon counts" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/summarizedexperiment/main.nf" + last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "rocker/r2u:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "r" + bioc: + - "SummarizedExperiment" + - "tximeta" + bioc_force_install: false + warnings_as_errors: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/summarizedexperiment/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/summarizedexperiment" + executable: "target/executable/summarizedexperiment/summarizedexperiment" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/summarizedexperiment/nextflow_labels.config b/target/executable/summarizedexperiment/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/summarizedexperiment/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/summarizedexperiment/summarizedexperiment b/target/executable/summarizedexperiment/summarizedexperiment new file mode 100755 index 0000000..7731ed7 --- /dev/null +++ b/target/executable/summarizedexperiment/summarizedexperiment @@ -0,0 +1,1269 @@ +#!/usr/bin/env bash + +# summarizedexperiment v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="summarizedexperiment" +VIASH_META_FUNCTIONALITY_NAME="summarizedexperiment" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM rocker/r2u:22.04 +ENTRYPOINT [] +RUN Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("SummarizedExperiment", quietly = TRUE)) BiocManager::install("SummarizedExperiment")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("tximeta", quietly = TRUE)) BiocManager::install("tximeta")' + +LABEL org.opencontainers.image.description="Companion container for running component summarizedexperiment" +LABEL org.opencontainers.image.created="2025-05-07T12:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "summarizedexperiment v0.3.0" + echo "" + echo "Create SummarizedExperiment object from Salmon counts" + echo "" + echo "Input:" + echo " --tpm_gene" + echo " type: file, file must exist" + echo "" + echo " --counts_gene" + echo " type: file, file must exist" + echo "" + echo " --counts_gene_length_scaled" + echo " type: file, file must exist" + echo "" + echo " --counts_gene_scaled" + echo " type: file, file must exist" + echo "" + echo " --tpm_transcript" + echo " type: file, file must exist" + echo "" + echo " --counts_transcript" + echo " type: file, file must exist" + echo "" + echo " --tx2gene_tsv" + echo " type: file, file must exist" + echo "" + echo "Output:" + echo " --output" + echo " type: file, output, file must exist" + echo " default: merged_summarizedexperiment" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "summarizedexperiment v0.3.0" + exit + ;; + --tpm_gene) + [ -n "$VIASH_PAR_TPM_GENE" ] && ViashError Bad arguments for option \'--tpm_gene\': \'$VIASH_PAR_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tpm_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tpm_gene=*) + [ -n "$VIASH_PAR_TPM_GENE" ] && ViashError Bad arguments for option \'--tpm_gene=*\': \'$VIASH_PAR_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene) + [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene=*) + [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene=*\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene_length_scaled) + [ -n "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_length_scaled\': \'$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene_length_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene_length_scaled=*) + [ -n "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_length_scaled=*\': \'$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene_scaled) + [ -n "$VIASH_PAR_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_scaled\': \'$VIASH_PAR_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene_scaled=*) + [ -n "$VIASH_PAR_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_scaled=*\': \'$VIASH_PAR_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tpm_transcript) + [ -n "$VIASH_PAR_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--tpm_transcript\': \'$VIASH_PAR_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tpm_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tpm_transcript=*) + [ -n "$VIASH_PAR_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--tpm_transcript=*\': \'$VIASH_PAR_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_transcript) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--counts_transcript\': \'$VIASH_PAR_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_transcript=*) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--counts_transcript=*\': \'$VIASH_PAR_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tx2gene_tsv) + [ -n "$VIASH_PAR_TX2GENE_TSV" ] && ViashError Bad arguments for option \'--tx2gene_tsv\': \'$VIASH_PAR_TX2GENE_TSV\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TX2GENE_TSV="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tx2gene_tsv. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tx2gene_tsv=*) + [ -n "$VIASH_PAR_TX2GENE_TSV" ] && ViashError Bad arguments for option \'--tx2gene_tsv=*\': \'$VIASH_PAR_TX2GENE_TSV\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TX2GENE_TSV=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/summarizedexperiment:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="merged_summarizedexperiment" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_TPM_GENE" ] && [ ! -e "$VIASH_PAR_TPM_GENE" ]; then + ViashError "Input file '$VIASH_PAR_TPM_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE" ]; then + ViashError "Input file '$VIASH_PAR_COUNTS_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ]; then + ViashError "Input file '$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE_SCALED" ]; then + ViashError "Input file '$VIASH_PAR_COUNTS_GENE_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_TPM_TRANSCRIPT" ]; then + ViashError "Input file '$VIASH_PAR_TPM_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_COUNTS_TRANSCRIPT" ]; then + ViashError "Input file '$VIASH_PAR_COUNTS_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TX2GENE_TSV" ] && [ ! -e "$VIASH_PAR_TX2GENE_TSV" ]; then + ViashError "Input file '$VIASH_PAR_TX2GENE_TSV' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_TPM_GENE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TPM_GENE")" ) + VIASH_PAR_TPM_GENE=$(ViashDockerAutodetectMount "$VIASH_PAR_TPM_GENE") +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COUNTS_GENE")" ) + VIASH_PAR_COUNTS_GENE=$(ViashDockerAutodetectMount "$VIASH_PAR_COUNTS_GENE") +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED")" ) + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED=$(ViashDockerAutodetectMount "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED") +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COUNTS_GENE_SCALED")" ) + VIASH_PAR_COUNTS_GENE_SCALED=$(ViashDockerAutodetectMount "$VIASH_PAR_COUNTS_GENE_SCALED") +fi +if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TPM_TRANSCRIPT")" ) + VIASH_PAR_TPM_TRANSCRIPT=$(ViashDockerAutodetectMount "$VIASH_PAR_TPM_TRANSCRIPT") +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COUNTS_TRANSCRIPT")" ) + VIASH_PAR_COUNTS_TRANSCRIPT=$(ViashDockerAutodetectMount "$VIASH_PAR_COUNTS_TRANSCRIPT") +fi +if [ ! -z "$VIASH_PAR_TX2GENE_TSV" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TX2GENE_TSV")" ) + VIASH_PAR_TX2GENE_TSV=$(ViashDockerAutodetectMount "$VIASH_PAR_TX2GENE_TSV") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-summarizedexperiment-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_TPM_GENE+x} ]; then echo "${VIASH_PAR_TPM_GENE}" | sed "s#'#'\"'\"'#g;s#.*#par_tpm_gene='&'#" ; else echo "# par_tpm_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE+x} ]; then echo "${VIASH_PAR_COUNTS_GENE}" | sed "s#'#'\"'\"'#g;s#.*#par_counts_gene='&'#" ; else echo "# par_counts_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE_LENGTH_SCALED+x} ]; then echo "${VIASH_PAR_COUNTS_GENE_LENGTH_SCALED}" | sed "s#'#'\"'\"'#g;s#.*#par_counts_gene_length_scaled='&'#" ; else echo "# par_counts_gene_length_scaled="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE_SCALED+x} ]; then echo "${VIASH_PAR_COUNTS_GENE_SCALED}" | sed "s#'#'\"'\"'#g;s#.*#par_counts_gene_scaled='&'#" ; else echo "# par_counts_gene_scaled="; fi ) +$( if [ ! -z ${VIASH_PAR_TPM_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_TPM_TRANSCRIPT}" | sed "s#'#'\"'\"'#g;s#.*#par_tpm_transcript='&'#" ; else echo "# par_tpm_transcript="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_COUNTS_TRANSCRIPT}" | sed "s#'#'\"'\"'#g;s#.*#par_counts_transcript='&'#" ; else echo "# par_counts_transcript="; fi ) +$( if [ ! -z ${VIASH_PAR_TX2GENE_TSV+x} ]; then echo "${VIASH_PAR_TX2GENE_TSV}" | sed "s#'#'\"'\"'#g;s#.*#par_tx2gene_tsv='&'#" ; else echo "# par_tx2gene_tsv="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +mkdir -p \$par_output + +Rscript "\$meta_resources_dir/summarizedexperiment.r" NULL \$par_counts_gene \$par_tpm_gene \$par_tx2gene_tsv + +Rscript "\$meta_resources_dir/summarizedexperiment.r" NULL \$par_counts_gene_length_scaled \$par_tpm_gene \$par_tx2gene_tsv + +Rscript "\$meta_resources_dir/summarizedexperiment.r" NULL \$par_counts_gene_scaled \$par_tpm_gene \$par_tx2gene_tsv + +Rscript "\$meta_resources_dir/summarizedexperiment.r" NULL \$par_counts_transcript \$par_tpm_transcript \$par_tx2gene_tsv + +mv \${par_counts_gene%.*}.rds \$par_output/ +mv \${par_counts_gene_length_scaled%.*}.rds \$par_output/ +mv \${par_counts_gene_scaled%.*}.rds \$par_output/ +mv \${par_counts_transcript%.*}.rds \$par_output/ +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_TPM_GENE" ]; then + VIASH_PAR_TPM_GENE=$(ViashDockerStripAutomount "$VIASH_PAR_TPM_GENE") + fi + if [ ! -z "$VIASH_PAR_COUNTS_GENE" ]; then + VIASH_PAR_COUNTS_GENE=$(ViashDockerStripAutomount "$VIASH_PAR_COUNTS_GENE") + fi + if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ]; then + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED=$(ViashDockerStripAutomount "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED") + fi + if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ]; then + VIASH_PAR_COUNTS_GENE_SCALED=$(ViashDockerStripAutomount "$VIASH_PAR_COUNTS_GENE_SCALED") + fi + if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ]; then + VIASH_PAR_TPM_TRANSCRIPT=$(ViashDockerStripAutomount "$VIASH_PAR_TPM_TRANSCRIPT") + fi + if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ]; then + VIASH_PAR_COUNTS_TRANSCRIPT=$(ViashDockerStripAutomount "$VIASH_PAR_COUNTS_TRANSCRIPT") + fi + if [ ! -z "$VIASH_PAR_TX2GENE_TSV" ]; then + VIASH_PAR_TX2GENE_TSV=$(ViashDockerStripAutomount "$VIASH_PAR_TX2GENE_TSV") + fi + if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/summarizedexperiment/summarizedexperiment.r b/target/executable/summarizedexperiment/summarizedexperiment.r new file mode 100755 index 0000000..5d656a7 --- /dev/null +++ b/target/executable/summarizedexperiment/summarizedexperiment.r @@ -0,0 +1,68 @@ +#!/usr/bin/env Rscript + +library(SummarizedExperiment) + +## Create SummarizedExperiment (se) object from Salmon counts + +args <- commandArgs(trailingOnly = TRUE) +if (length(args) < 2) { + stop("Usage: salmon_se.r ", call. = FALSE) +} + +coldata <- args[1] +counts_fn <- args[2] +tpm_fn <- args[3] +tx2gene <- args[4] + +info <- file.info(tx2gene) +if (info$size == 0) { + tx2gene <- NULL +} else { + rowdata <- read.csv(tx2gene, sep = "\t", header = FALSE) + colnames(rowdata) <- c("tx", "gene_id", "gene_name") + tx2gene <- rowdata[, 1:2] +} + +counts <- read.csv(counts_fn, row.names = 1, sep = "\t") +counts <- counts[, 2:ncol(counts), drop = FALSE] # remove gene_name column +tpm <- read.csv(tpm_fn, row.names = 1, sep = "\t") +tpm <- tpm[, 2:ncol(tpm), drop = FALSE] # remove gene_name column + +if (length(intersect(rownames(counts), rowdata[["tx"]])) > length(intersect(rownames(counts), rowdata[["gene_id"]]))) { + by_what <- "tx" +} else { + by_what <- "gene_id" + rowdata <- unique(rowdata[, 2:3]) +} + +if (file.exists(coldata)) { + coldata <- read.csv(coldata, sep = "\t") + coldata <- coldata[match(colnames(counts), coldata[, 1]), ] + coldata <- cbind(files = fns, coldata) +} else { + message("ColData not avaliable ", coldata) + coldata <- data.frame(files = colnames(counts), names = colnames(counts)) +} + +rownames(coldata) <- coldata[["names"]] +extra <- setdiff(rownames(counts), as.character(rowdata[[by_what]])) +if (length(extra) > 0) { + rowdata <- rbind( + rowdata, + data.frame( + tx = extra, + gene_id = extra, + gene_name = extra + )[, colnames(rowdata)] + ) +} + +rowdata <- rowdata[match(rownames(counts), as.character(rowdata[[by_what]])), ] +rownames(rowdata) <- rowdata[[by_what]] +se <- SummarizedExperiment( + assays = list(counts = counts, abundance = tpm), + colData = DataFrame(coldata), + rowData = rowdata +) + +saveRDS(se, file = paste0(tools::file_path_sans_ext(counts_fn), ".rds")) diff --git a/target/executable/tx2gene/.config.vsh.yaml b/target/executable/tx2gene/.config.vsh.yaml new file mode 100644 index 0000000..22b4423 --- /dev/null +++ b/target/executable/tx2gene/.config.vsh.yaml @@ -0,0 +1,225 @@ +name: "tx2gene" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--quant_results" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--gtf" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_extra_attributes" + info: null + default: + - "gene_name" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_group_features" + info: null + default: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--quant_type" + description: "Method used for quantification" + info: null + required: false + choices: + - "salmon" + - "kallisto" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--tsv" + info: null + default: + - "tx2gene.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "tx2gene.py" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Get transcript id (tx) to gene names for tximport" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/tx2gene/main.nf" + last_sha: "839ac5cab892504514cc96d44e99e70516b239d2" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "pip" + - "unzip" + interactive: false + - type: "python" + user: false + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/tx2gene/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/tx2gene" + executable: "target/executable/tx2gene/tx2gene" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/tx2gene/nextflow_labels.config b/target/executable/tx2gene/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/tx2gene/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/tx2gene/tx2gene b/target/executable/tx2gene/tx2gene new file mode 100755 index 0000000..df9ca61 --- /dev/null +++ b/target/executable/tx2gene/tx2gene @@ -0,0 +1,1246 @@ +#!/usr/bin/env bash + +# tx2gene v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="tx2gene" +VIASH_META_FUNCTIONALITY_NAME="tx2gene" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y pip unzip && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip + +LABEL org.opencontainers.image.description="Companion container for running component tx2gene" +LABEL org.opencontainers.image.created="2025-05-07T12:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "tx2gene v0.3.0" + echo "" + echo "Get transcript id (tx) to gene names for tximport" + echo "" + echo "Input:" + echo " --quant_results" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo "" + echo " --gtf_extra_attributes" + echo " type: string" + echo " default: gene_name" + echo "" + echo " --gtf_group_features" + echo " type: string" + echo " default: gene_id" + echo "" + echo " --quant_type" + echo " type: string" + echo " choices: [ salmon, kallisto ]" + echo " Method used for quantification" + echo "" + echo "Output:" + echo " --tsv" + echo " type: file, output, file must exist" + echo " default: tx2gene.tsv" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "tx2gene v0.3.0" + exit + ;; + --quant_results) + if [ -z "$VIASH_PAR_QUANT_RESULTS" ]; then + VIASH_PAR_QUANT_RESULTS="$2" + else + VIASH_PAR_QUANT_RESULTS="$VIASH_PAR_QUANT_RESULTS;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_results. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quant_results=*) + if [ -z "$VIASH_PAR_QUANT_RESULTS" ]; then + VIASH_PAR_QUANT_RESULTS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_QUANT_RESULTS="$VIASH_PAR_QUANT_RESULTS;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --gtf) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf=*) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf_extra_attributes) + [ -n "$VIASH_PAR_GTF_EXTRA_ATTRIBUTES" ] && ViashError Bad arguments for option \'--gtf_extra_attributes\': \'$VIASH_PAR_GTF_EXTRA_ATTRIBUTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_EXTRA_ATTRIBUTES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_extra_attributes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf_extra_attributes=*) + [ -n "$VIASH_PAR_GTF_EXTRA_ATTRIBUTES" ] && ViashError Bad arguments for option \'--gtf_extra_attributes=*\': \'$VIASH_PAR_GTF_EXTRA_ATTRIBUTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_EXTRA_ATTRIBUTES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf_group_features) + [ -n "$VIASH_PAR_GTF_GROUP_FEATURES" ] && ViashError Bad arguments for option \'--gtf_group_features\': \'$VIASH_PAR_GTF_GROUP_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_GROUP_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_group_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf_group_features=*) + [ -n "$VIASH_PAR_GTF_GROUP_FEATURES" ] && ViashError Bad arguments for option \'--gtf_group_features=*\': \'$VIASH_PAR_GTF_GROUP_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_GROUP_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quant_type) + [ -n "$VIASH_PAR_QUANT_TYPE" ] && ViashError Bad arguments for option \'--quant_type\': \'$VIASH_PAR_QUANT_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_TYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_type. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quant_type=*) + [ -n "$VIASH_PAR_QUANT_TYPE" ] && ViashError Bad arguments for option \'--quant_type=*\': \'$VIASH_PAR_QUANT_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_TYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tsv) + [ -n "$VIASH_PAR_TSV" ] && ViashError Bad arguments for option \'--tsv\': \'$VIASH_PAR_TSV\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TSV="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tsv. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tsv=*) + [ -n "$VIASH_PAR_TSV" ] && ViashError Bad arguments for option \'--tsv=*\': \'$VIASH_PAR_TSV\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TSV=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/tx2gene:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_GTF_EXTRA_ATTRIBUTES+x} ]; then + VIASH_PAR_GTF_EXTRA_ATTRIBUTES="gene_name" +fi +if [ -z ${VIASH_PAR_GTF_GROUP_FEATURES+x} ]; then + VIASH_PAR_GTF_GROUP_FEATURES="gene_id" +fi +if [ -z ${VIASH_PAR_TSV+x} ]; then + VIASH_PAR_TSV="tx2gene.tsv" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_QUANT_RESULTS" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_QUANT_RESULTS; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then + ViashError "Input file '$VIASH_PAR_GTF' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_QUANT_TYPE" ]; then + VIASH_PAR_QUANT_TYPE_CHOICES=("salmon;kallisto") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_QUANT_TYPE_CHOICES[*]};" =~ ";$VIASH_PAR_QUANT_TYPE;" ]]; then + ViashError '--quant_type' specified value of \'$VIASH_PAR_QUANT_TYPE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_TSV" ] && [ ! -d "$(dirname "$VIASH_PAR_TSV")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TSV")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_QUANT_RESULTS" ]; then + VIASH_TEST_QUANT_RESULTS=() + IFS=';' + for var in $VIASH_PAR_QUANT_RESULTS; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_QUANT_RESULTS+=( "$var" ) + done + VIASH_PAR_QUANT_RESULTS=$(IFS=';' ; echo "${VIASH_TEST_QUANT_RESULTS[*]}") +fi +if [ ! -z "$VIASH_PAR_GTF" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_GTF")" ) + VIASH_PAR_GTF=$(ViashDockerAutodetectMount "$VIASH_PAR_GTF") +fi +if [ ! -z "$VIASH_PAR_TSV" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TSV")" ) + VIASH_PAR_TSV=$(ViashDockerAutodetectMount "$VIASH_PAR_TSV") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_TSV" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-tx2gene-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_QUANT_RESULTS+x} ]; then echo "${VIASH_PAR_QUANT_RESULTS}" | sed "s#'#'\"'\"'#g;s#.*#par_quant_results='&'#" ; else echo "# par_quant_results="; fi ) +$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_GTF_EXTRA_ATTRIBUTES+x} ]; then echo "${VIASH_PAR_GTF_EXTRA_ATTRIBUTES}" | sed "s#'#'\"'\"'#g;s#.*#par_gtf_extra_attributes='&'#" ; else echo "# par_gtf_extra_attributes="; fi ) +$( if [ ! -z ${VIASH_PAR_GTF_GROUP_FEATURES+x} ]; then echo "${VIASH_PAR_GTF_GROUP_FEATURES}" | sed "s#'#'\"'\"'#g;s#.*#par_gtf_group_features='&'#" ; else echo "# par_gtf_group_features="; fi ) +$( if [ ! -z ${VIASH_PAR_QUANT_TYPE+x} ]; then echo "${VIASH_PAR_QUANT_TYPE}" | sed "s#'#'\"'\"'#g;s#.*#par_quant_type='&'#" ; else echo "# par_quant_type="; fi ) +$( if [ ! -z ${VIASH_PAR_TSV+x} ]; then echo "${VIASH_PAR_TSV}" | sed "s#'#'\"'\"'#g;s#.*#par_tsv='&'#" ; else echo "# par_tsv="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +function clean_up { + rm -rf "\$tmpdir" +} +trap clean_up EXIT + +tmpdir=\$(mktemp -d "\$meta_temp_dir/\$meta_name-XXXXXXXX") + +IFS=";" read -ra results <<< \$par_quant_results +for result in \${results[*]} +do + cp -r \$result \$tmpdir +done + +python3 "\$meta_resources_dir/tx2gene.py" \\ + --quant_type \$par_quant_type \\ + --gtf \$par_gtf \\ + --quants \$tmpdir \\ + --id \$par_gtf_group_features \\ + --extra \$par_gtf_extra_attributes \\ + -o \$par_tsv +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_QUANT_RESULTS" ]; then + unset VIASH_TEST_QUANT_RESULTS + IFS=';' + for var in $VIASH_PAR_QUANT_RESULTS; do + unset IFS + if [ -z "$VIASH_TEST_QUANT_RESULTS" ]; then + VIASH_TEST_QUANT_RESULTS="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_QUANT_RESULTS="$VIASH_TEST_QUANT_RESULTS;""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_QUANT_RESULTS="$VIASH_TEST_QUANT_RESULTS" + fi + if [ ! -z "$VIASH_PAR_GTF" ]; then + VIASH_PAR_GTF=$(ViashDockerStripAutomount "$VIASH_PAR_GTF") + fi + if [ ! -z "$VIASH_PAR_TSV" ]; then + VIASH_PAR_TSV=$(ViashDockerStripAutomount "$VIASH_PAR_TSV") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_TSV" ] && [ ! -e "$VIASH_PAR_TSV" ]; then + ViashError "Output file '$VIASH_PAR_TSV' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/tx2gene/tx2gene.py b/target/executable/tx2gene/tx2gene.py new file mode 100755 index 0000000..dd50079 --- /dev/null +++ b/target/executable/tx2gene/tx2gene.py @@ -0,0 +1,169 @@ + +#!/usr/bin/env python + +# Written by Lorena Pantano with subsequent reworking by Jonathan Manning. Released under the MIT license. + +import logging +import argparse +import glob +import os +import re +from collections import Counter, defaultdict, OrderedDict +from collections.abc import Set +from typing import Dict + +# Configure logging +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def read_top_transcripts(quant_dir: str, file_pattern: str) -> Set[str]: + """ + Read the top 100 transcripts from the quantification file. + + Parameters: + quant_dir (str): Directory where quantification files are located. + file_pattern (str): Pattern to match quantification files. + + Returns: + set: A set containing the top 100 transcripts. + """ + try: + # Find the quantification file within the directory + quant_file_path = glob.glob(os.path.join(quant_dir, file_pattern))[0] + with open(quant_file_path, "r") as file_handle: + # Read the file and extract the top 100 transcripts + return {line.split()[0] for i, line in enumerate(file_handle) if i > 0 and i <= 100} + except IndexError: + # Log an error and raise a FileNotFoundError if the quant file does not exist + logger.error("No quantification files found.") + raise FileNotFoundError("Quantification file not found.") + + +def discover_transcript_attribute(gtf_file: str, transcripts: Set[str]) -> str: + """ + Discover the attribute in the GTF that corresponds to transcripts, prioritizing 'transcript_id'. + + Parameters: + gtf_file (str): Path to the GTF file. + transcripts (Set[str]): A set of transcripts to match in the GTF file. + + Returns: + str: The attribute name that corresponds to transcripts in the GTF file. + """ + + votes = Counter() + with open(gtf_file) as inh: + # Read GTF file, skipping header lines + for line in filter(lambda x: not x.startswith("#"), inh): + cols = line.split("\t") + + # Use regular expression to correctly split the attributes string + attributes_str = cols[8] + attributes = dict(re.findall(r'(\S+) "(.*?)(? Dict[str, str]: + """ + Parse the attributes column of a GTF file. + + :param attributes_text: The attributes column as a string. + :return: A dictionary of the attributes. + """ + # Split the attributes string by semicolon and strip whitespace + attributes = attributes_text.strip().split(";") + attr_dict = OrderedDict() + + # Iterate over each attribute pair + for attribute in attributes: + # Split the attribute into key and value, ensuring there are two parts + parts = attribute.strip().split(" ", 1) + if len(parts) == 2: + key, value = parts + # Remove any double quotes from the value + value = value.replace('"', "") + attr_dict[key] = value + + return attr_dict + + +def map_transcripts_to_gene( + quant_type: str, gtf_file: str, quant_dir: str, gene_id: str, extra_id_field: str, output_file: str +) -> bool: + """ + Map transcripts to gene names and write the output to a file. + + Parameters: + quant_type (str): The quantification method used (e.g., 'salmon'). + gtf_file (str): Path to the GTF file. + quant_dir (str): Directory where quantification files are located. + gene_id (str): The gene ID attribute in the GTF file. + extra_id_field (str): Additional ID field in the GTF file. + output_file (str): The output file path. + + Returns: + bool: True if the operation was successful, False otherwise. + """ + # Read the top transcripts based on quantification type + transcripts = read_top_transcripts(quant_dir, "*quant_results.sf" if quant_type == "salmon" else "*abundance.tsv") + # Discover the attribute that corresponds to transcripts in the GTF + transcript_attribute = discover_transcript_attribute(gtf_file, transcripts) + + if not transcript_attribute: + # If no attribute is found, return False + return False + + # Open GTF and output file to write the mappings + # Initialize the set to track seen combinations + seen = set() + + with open(gtf_file) as inh, open(output_file, "w") as output_handle: + # Parse each line of the GTF, mapping transcripts to genes + for line in filter(lambda x: not x.startswith("#"), inh): + cols = line.split("\t") + attr_dict = parse_attributes(cols[8]) + if gene_id in attr_dict and transcript_attribute in attr_dict: + # Create a unique identifier for the transcript-gene combination + transcript_gene_pair = (attr_dict[transcript_attribute], attr_dict[gene_id]) + + # Check if the combination has already been seen + if transcript_gene_pair not in seen: + # If it's a new combination, write it to the output and add to the seen set + extra_id = attr_dict.get(extra_id_field, attr_dict[gene_id]) + output_handle.write(f"{attr_dict[transcript_attribute]}\t{attr_dict[gene_id]}\t{extra_id}\n") + seen.add(transcript_gene_pair) + + return True + + +# Main function to parse arguments and call the mapping function +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Map transcripts to gene names for tximport.") + parser.add_argument("--quant_type", type=str, help="Quantification type", default="salmon") + parser.add_argument("--gtf", type=str, help="GTF file", required=True) + parser.add_argument("--quants", type=str, help="Output of quantification", required=True) + parser.add_argument("--id", type=str, help="Gene ID in the GTF file", required=True) + parser.add_argument("--extra", type=str, help="Extra ID in the GTF file") + parser.add_argument("-o", "--output", dest="output", default="tx2gene.tsv", type=str, help="File with output") + + args = parser.parse_args() + if not map_transcripts_to_gene(args.quant_type, args.gtf, args.quants, args.id, args.extra, args.output): + logger.error("Failed to map transcripts to genes.") diff --git a/target/executable/tximport/.config.vsh.yaml b/target/executable/tximport/.config.vsh.yaml new file mode 100644 index 0000000..fd7a658 --- /dev/null +++ b/target/executable/tximport/.config.vsh.yaml @@ -0,0 +1,292 @@ +name: "tximport" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--quant_results" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--tx2gene_tsv" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--quant_type" + description: "Method used for quantification" + info: null + required: false + choices: + - "salmon" + - "kallisto" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--tpm_gene" + info: null + default: + - "merged.gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene" + info: null + default: + - "merged.gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_length_scaled" + info: null + default: + - "merged.gene_counts_length_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_scaled" + info: null + default: + - "merged.gene_counts_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--lengths_gene" + info: null + default: + - "merged.gene_lengths.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_transcript" + info: null + default: + - "merged.transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcript" + info: null + default: + - "merged.transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--lengths_transcript" + info: null + default: + - "merged.transcript_lengths.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "tximport.r" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Get dataframe linking transcript ID, gene ID, and gene name" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/tximport/main.nf" + last_sha: "489bcb4efdc7bd58839b22b0360d26b4d80b87a8" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "r-base" + - "libcurl4-openssl-dev" + - "libssl-dev" + - "libxml2-dev" + interactive: false + - type: "r" + cran: + - "jsonlite" + bioc: + - "SummarizedExperiment" + - "tximport" + - "tximeta" + bioc_force_install: false + warnings_as_errors: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/tximport/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/tximport" + executable: "target/executable/tximport/tximport" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/tximport/nextflow_labels.config b/target/executable/tximport/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/tximport/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/tximport/tximport b/target/executable/tximport/tximport new file mode 100755 index 0000000..b3b8fe7 --- /dev/null +++ b/target/executable/tximport/tximport @@ -0,0 +1,1450 @@ +#!/usr/bin/env bash + +# tximport v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="tximport" +VIASH_META_FUNCTIONALITY_NAME="tximport" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y r-base libcurl4-openssl-dev libssl-dev libxml2-dev && \ + rm -rf /var/lib/apt/lists/* + +RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("SummarizedExperiment", quietly = TRUE)) BiocManager::install("SummarizedExperiment")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("tximport", quietly = TRUE)) BiocManager::install("tximport")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("tximeta", quietly = TRUE)) BiocManager::install("tximeta")' && \ + Rscript -e 'options(warn = 2); remotes::install_cran(c("jsonlite"), repos = "https://cran.rstudio.com")' + +LABEL org.opencontainers.image.description="Companion container for running component tximport" +LABEL org.opencontainers.image.created="2025-05-07T12:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "tximport v0.3.0" + echo "" + echo "Get dataframe linking transcript ID, gene ID, and gene name" + echo "" + echo "Input:" + echo " --quant_results" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --tx2gene_tsv" + echo " type: file, file must exist" + echo "" + echo " --quant_type" + echo " type: string" + echo " choices: [ salmon, kallisto ]" + echo " Method used for quantification" + echo "" + echo "Output:" + echo " --tpm_gene" + echo " type: file, output, file must exist" + echo " default: merged.gene_tpm.tsv" + echo "" + echo " --counts_gene" + echo " type: file, output, file must exist" + echo " default: merged.gene_counts.tsv" + echo "" + echo " --counts_gene_length_scaled" + echo " type: file, output, file must exist" + echo " default: merged.gene_counts_length_scaled.tsv" + echo "" + echo " --counts_gene_scaled" + echo " type: file, output, file must exist" + echo " default: merged.gene_counts_scaled.tsv" + echo "" + echo " --lengths_gene" + echo " type: file, output, file must exist" + echo " default: merged.gene_lengths.tsv" + echo "" + echo " --tpm_transcript" + echo " type: file, output, file must exist" + echo " default: merged.transcript_tpm.tsv" + echo "" + echo " --counts_transcript" + echo " type: file, output, file must exist" + echo " default: merged.transcript_counts.tsv" + echo "" + echo " --lengths_transcript" + echo " type: file, output, file must exist" + echo " default: merged.transcript_lengths.tsv" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "tximport v0.3.0" + exit + ;; + --quant_results) + if [ -z "$VIASH_PAR_QUANT_RESULTS" ]; then + VIASH_PAR_QUANT_RESULTS="$2" + else + VIASH_PAR_QUANT_RESULTS="$VIASH_PAR_QUANT_RESULTS;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_results. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quant_results=*) + if [ -z "$VIASH_PAR_QUANT_RESULTS" ]; then + VIASH_PAR_QUANT_RESULTS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_QUANT_RESULTS="$VIASH_PAR_QUANT_RESULTS;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --tx2gene_tsv) + [ -n "$VIASH_PAR_TX2GENE_TSV" ] && ViashError Bad arguments for option \'--tx2gene_tsv\': \'$VIASH_PAR_TX2GENE_TSV\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TX2GENE_TSV="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tx2gene_tsv. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tx2gene_tsv=*) + [ -n "$VIASH_PAR_TX2GENE_TSV" ] && ViashError Bad arguments for option \'--tx2gene_tsv=*\': \'$VIASH_PAR_TX2GENE_TSV\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TX2GENE_TSV=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quant_type) + [ -n "$VIASH_PAR_QUANT_TYPE" ] && ViashError Bad arguments for option \'--quant_type\': \'$VIASH_PAR_QUANT_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_TYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_type. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quant_type=*) + [ -n "$VIASH_PAR_QUANT_TYPE" ] && ViashError Bad arguments for option \'--quant_type=*\': \'$VIASH_PAR_QUANT_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_TYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tpm_gene) + [ -n "$VIASH_PAR_TPM_GENE" ] && ViashError Bad arguments for option \'--tpm_gene\': \'$VIASH_PAR_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tpm_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tpm_gene=*) + [ -n "$VIASH_PAR_TPM_GENE" ] && ViashError Bad arguments for option \'--tpm_gene=*\': \'$VIASH_PAR_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene) + [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene=*) + [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene=*\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene_length_scaled) + [ -n "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_length_scaled\': \'$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene_length_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene_length_scaled=*) + [ -n "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_length_scaled=*\': \'$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene_scaled) + [ -n "$VIASH_PAR_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_scaled\': \'$VIASH_PAR_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene_scaled=*) + [ -n "$VIASH_PAR_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_scaled=*\': \'$VIASH_PAR_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lengths_gene) + [ -n "$VIASH_PAR_LENGTHS_GENE" ] && ViashError Bad arguments for option \'--lengths_gene\': \'$VIASH_PAR_LENGTHS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LENGTHS_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lengths_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --lengths_gene=*) + [ -n "$VIASH_PAR_LENGTHS_GENE" ] && ViashError Bad arguments for option \'--lengths_gene=*\': \'$VIASH_PAR_LENGTHS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LENGTHS_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tpm_transcript) + [ -n "$VIASH_PAR_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--tpm_transcript\': \'$VIASH_PAR_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tpm_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tpm_transcript=*) + [ -n "$VIASH_PAR_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--tpm_transcript=*\': \'$VIASH_PAR_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_transcript) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--counts_transcript\': \'$VIASH_PAR_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_transcript=*) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--counts_transcript=*\': \'$VIASH_PAR_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lengths_transcript) + [ -n "$VIASH_PAR_LENGTHS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--lengths_transcript\': \'$VIASH_PAR_LENGTHS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LENGTHS_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lengths_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --lengths_transcript=*) + [ -n "$VIASH_PAR_LENGTHS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--lengths_transcript=*\': \'$VIASH_PAR_LENGTHS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LENGTHS_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/tximport:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_TPM_GENE+x} ]; then + VIASH_PAR_TPM_GENE="merged.gene_tpm.tsv" +fi +if [ -z ${VIASH_PAR_COUNTS_GENE+x} ]; then + VIASH_PAR_COUNTS_GENE="merged.gene_counts.tsv" +fi +if [ -z ${VIASH_PAR_COUNTS_GENE_LENGTH_SCALED+x} ]; then + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED="merged.gene_counts_length_scaled.tsv" +fi +if [ -z ${VIASH_PAR_COUNTS_GENE_SCALED+x} ]; then + VIASH_PAR_COUNTS_GENE_SCALED="merged.gene_counts_scaled.tsv" +fi +if [ -z ${VIASH_PAR_LENGTHS_GENE+x} ]; then + VIASH_PAR_LENGTHS_GENE="merged.gene_lengths.tsv" +fi +if [ -z ${VIASH_PAR_TPM_TRANSCRIPT+x} ]; then + VIASH_PAR_TPM_TRANSCRIPT="merged.transcript_tpm.tsv" +fi +if [ -z ${VIASH_PAR_COUNTS_TRANSCRIPT+x} ]; then + VIASH_PAR_COUNTS_TRANSCRIPT="merged.transcript_counts.tsv" +fi +if [ -z ${VIASH_PAR_LENGTHS_TRANSCRIPT+x} ]; then + VIASH_PAR_LENGTHS_TRANSCRIPT="merged.transcript_lengths.tsv" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_QUANT_RESULTS" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_QUANT_RESULTS; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_TX2GENE_TSV" ] && [ ! -e "$VIASH_PAR_TX2GENE_TSV" ]; then + ViashError "Input file '$VIASH_PAR_TX2GENE_TSV' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_QUANT_TYPE" ]; then + VIASH_PAR_QUANT_TYPE_CHOICES=("salmon;kallisto") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_QUANT_TYPE_CHOICES[*]};" =~ ";$VIASH_PAR_QUANT_TYPE;" ]]; then + ViashError '--quant_type' specified value of \'$VIASH_PAR_QUANT_TYPE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_TPM_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_TPM_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TPM_GENE")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE_SCALED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE_SCALED")" +fi +if [ ! -z "$VIASH_PAR_LENGTHS_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_LENGTHS_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_LENGTHS_GENE")" +fi +if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_TPM_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TPM_TRANSCRIPT")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPT")" +fi +if [ ! -z "$VIASH_PAR_LENGTHS_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_LENGTHS_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_LENGTHS_TRANSCRIPT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_QUANT_RESULTS" ]; then + VIASH_TEST_QUANT_RESULTS=() + IFS=';' + for var in $VIASH_PAR_QUANT_RESULTS; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_QUANT_RESULTS+=( "$var" ) + done + VIASH_PAR_QUANT_RESULTS=$(IFS=';' ; echo "${VIASH_TEST_QUANT_RESULTS[*]}") +fi +if [ ! -z "$VIASH_PAR_TX2GENE_TSV" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TX2GENE_TSV")" ) + VIASH_PAR_TX2GENE_TSV=$(ViashDockerAutodetectMount "$VIASH_PAR_TX2GENE_TSV") +fi +if [ ! -z "$VIASH_PAR_TPM_GENE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TPM_GENE")" ) + VIASH_PAR_TPM_GENE=$(ViashDockerAutodetectMount "$VIASH_PAR_TPM_GENE") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_TPM_GENE" ) +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COUNTS_GENE")" ) + VIASH_PAR_COUNTS_GENE=$(ViashDockerAutodetectMount "$VIASH_PAR_COUNTS_GENE") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_COUNTS_GENE" ) +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED")" ) + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED=$(ViashDockerAutodetectMount "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ) +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COUNTS_GENE_SCALED")" ) + VIASH_PAR_COUNTS_GENE_SCALED=$(ViashDockerAutodetectMount "$VIASH_PAR_COUNTS_GENE_SCALED") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_COUNTS_GENE_SCALED" ) +fi +if [ ! -z "$VIASH_PAR_LENGTHS_GENE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_LENGTHS_GENE")" ) + VIASH_PAR_LENGTHS_GENE=$(ViashDockerAutodetectMount "$VIASH_PAR_LENGTHS_GENE") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_LENGTHS_GENE" ) +fi +if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TPM_TRANSCRIPT")" ) + VIASH_PAR_TPM_TRANSCRIPT=$(ViashDockerAutodetectMount "$VIASH_PAR_TPM_TRANSCRIPT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_TPM_TRANSCRIPT" ) +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COUNTS_TRANSCRIPT")" ) + VIASH_PAR_COUNTS_TRANSCRIPT=$(ViashDockerAutodetectMount "$VIASH_PAR_COUNTS_TRANSCRIPT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_COUNTS_TRANSCRIPT" ) +fi +if [ ! -z "$VIASH_PAR_LENGTHS_TRANSCRIPT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_LENGTHS_TRANSCRIPT")" ) + VIASH_PAR_LENGTHS_TRANSCRIPT=$(ViashDockerAutodetectMount "$VIASH_PAR_LENGTHS_TRANSCRIPT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_LENGTHS_TRANSCRIPT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-tximport-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_QUANT_RESULTS+x} ]; then echo "${VIASH_PAR_QUANT_RESULTS}" | sed "s#'#'\"'\"'#g;s#.*#par_quant_results='&'#" ; else echo "# par_quant_results="; fi ) +$( if [ ! -z ${VIASH_PAR_TX2GENE_TSV+x} ]; then echo "${VIASH_PAR_TX2GENE_TSV}" | sed "s#'#'\"'\"'#g;s#.*#par_tx2gene_tsv='&'#" ; else echo "# par_tx2gene_tsv="; fi ) +$( if [ ! -z ${VIASH_PAR_QUANT_TYPE+x} ]; then echo "${VIASH_PAR_QUANT_TYPE}" | sed "s#'#'\"'\"'#g;s#.*#par_quant_type='&'#" ; else echo "# par_quant_type="; fi ) +$( if [ ! -z ${VIASH_PAR_TPM_GENE+x} ]; then echo "${VIASH_PAR_TPM_GENE}" | sed "s#'#'\"'\"'#g;s#.*#par_tpm_gene='&'#" ; else echo "# par_tpm_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE+x} ]; then echo "${VIASH_PAR_COUNTS_GENE}" | sed "s#'#'\"'\"'#g;s#.*#par_counts_gene='&'#" ; else echo "# par_counts_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE_LENGTH_SCALED+x} ]; then echo "${VIASH_PAR_COUNTS_GENE_LENGTH_SCALED}" | sed "s#'#'\"'\"'#g;s#.*#par_counts_gene_length_scaled='&'#" ; else echo "# par_counts_gene_length_scaled="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE_SCALED+x} ]; then echo "${VIASH_PAR_COUNTS_GENE_SCALED}" | sed "s#'#'\"'\"'#g;s#.*#par_counts_gene_scaled='&'#" ; else echo "# par_counts_gene_scaled="; fi ) +$( if [ ! -z ${VIASH_PAR_LENGTHS_GENE+x} ]; then echo "${VIASH_PAR_LENGTHS_GENE}" | sed "s#'#'\"'\"'#g;s#.*#par_lengths_gene='&'#" ; else echo "# par_lengths_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_TPM_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_TPM_TRANSCRIPT}" | sed "s#'#'\"'\"'#g;s#.*#par_tpm_transcript='&'#" ; else echo "# par_tpm_transcript="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_COUNTS_TRANSCRIPT}" | sed "s#'#'\"'\"'#g;s#.*#par_counts_transcript='&'#" ; else echo "# par_counts_transcript="; fi ) +$( if [ ! -z ${VIASH_PAR_LENGTHS_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_LENGTHS_TRANSCRIPT}" | sed "s#'#'\"'\"'#g;s#.*#par_lengths_transcript='&'#" ; else echo "# par_lengths_transcript="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +function clean_up { + rm -rf "\$tmpdir" +} +trap clean_up EXIT + +tmpdir=\$(mktemp -d "\$meta_temp_dir/\$meta_name-XXXXXXXX") + +IFS=";" read -ra results <<< \$par_quant_results +for result in \${results[*]} +do + cp -r \$result \$tmpdir +done + +Rscript "\$meta_resources_dir/tximport.r" \\ + NULL \\ + \$tmpdir \\ + merged \\ + \$par_quant_type \\ + \$par_tx2gene_tsv +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_QUANT_RESULTS" ]; then + unset VIASH_TEST_QUANT_RESULTS + IFS=';' + for var in $VIASH_PAR_QUANT_RESULTS; do + unset IFS + if [ -z "$VIASH_TEST_QUANT_RESULTS" ]; then + VIASH_TEST_QUANT_RESULTS="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_QUANT_RESULTS="$VIASH_TEST_QUANT_RESULTS;""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_QUANT_RESULTS="$VIASH_TEST_QUANT_RESULTS" + fi + if [ ! -z "$VIASH_PAR_TX2GENE_TSV" ]; then + VIASH_PAR_TX2GENE_TSV=$(ViashDockerStripAutomount "$VIASH_PAR_TX2GENE_TSV") + fi + if [ ! -z "$VIASH_PAR_TPM_GENE" ]; then + VIASH_PAR_TPM_GENE=$(ViashDockerStripAutomount "$VIASH_PAR_TPM_GENE") + fi + if [ ! -z "$VIASH_PAR_COUNTS_GENE" ]; then + VIASH_PAR_COUNTS_GENE=$(ViashDockerStripAutomount "$VIASH_PAR_COUNTS_GENE") + fi + if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ]; then + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED=$(ViashDockerStripAutomount "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED") + fi + if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ]; then + VIASH_PAR_COUNTS_GENE_SCALED=$(ViashDockerStripAutomount "$VIASH_PAR_COUNTS_GENE_SCALED") + fi + if [ ! -z "$VIASH_PAR_LENGTHS_GENE" ]; then + VIASH_PAR_LENGTHS_GENE=$(ViashDockerStripAutomount "$VIASH_PAR_LENGTHS_GENE") + fi + if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ]; then + VIASH_PAR_TPM_TRANSCRIPT=$(ViashDockerStripAutomount "$VIASH_PAR_TPM_TRANSCRIPT") + fi + if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ]; then + VIASH_PAR_COUNTS_TRANSCRIPT=$(ViashDockerStripAutomount "$VIASH_PAR_COUNTS_TRANSCRIPT") + fi + if [ ! -z "$VIASH_PAR_LENGTHS_TRANSCRIPT" ]; then + VIASH_PAR_LENGTHS_TRANSCRIPT=$(ViashDockerStripAutomount "$VIASH_PAR_LENGTHS_TRANSCRIPT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_TPM_GENE" ] && [ ! -e "$VIASH_PAR_TPM_GENE" ]; then + ViashError "Output file '$VIASH_PAR_TPM_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE_SCALED" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_GENE_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_LENGTHS_GENE" ] && [ ! -e "$VIASH_PAR_LENGTHS_GENE" ]; then + ViashError "Output file '$VIASH_PAR_LENGTHS_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_TPM_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_TPM_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_COUNTS_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_LENGTHS_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_LENGTHS_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_LENGTHS_TRANSCRIPT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/tximport/tximport.r b/target/executable/tximport/tximport.r new file mode 100755 index 0000000..5036399 --- /dev/null +++ b/target/executable/tximport/tximport.r @@ -0,0 +1,142 @@ +#!/usr/bin/env Rscript + +# Script for importing and processing transcript-level quantifications. +# Written by Lorena Pantano, later modified by Jonathan Manning, and released under the MIT license. + +# Loading required libraries +library(SummarizedExperiment) +library(tximport) + +# Parsing command line arguments +args <- commandArgs(trailingOnly=TRUE) +if (length(args) < 4) { + stop("Usage: tximport.r ", + call.=FALSE) +} + +# Assigning command line arguments to variables +coldata_path <- args[1] +path <- args[2] +prefix <- args[3] +quant_type <- args[4] +tx2gene_path <- args[5] + +## Functions + +# Build a table from a SummarizedExperiment object +build_table <- function(se.obj, slot) { + cbind(rowData(se.obj)[,1:2], assays(se.obj)[[slot]]) +} + +# Write a table to a file with given parameters +write_se_table <- function(params) { + file_name <- paste0(prefix, ".", params$suffix) + write.table(build_table(params$obj, params$slot), file_name, + sep="\t", quote=FALSE, row.names = FALSE) +} + +# Read transcript metadata from a given path +read_transcript_info <- function(tinfo_path){ + info <- file.info(tinfo_path) + if (info$size == 0) { + stop("tx2gene file is empty") + } + + transcript_info <- read.csv(tinfo_path, sep="\t", header = FALSE, + col.names = c("tx", "gene_id", "gene_name")) + + extra <- setdiff(rownames(txi[[1]]), as.character(transcript_info[["tx"]])) + transcript_info <- rbind(transcript_info, data.frame(tx=extra, gene_id=extra, gene_name=extra)) + transcript_info <- transcript_info[match(rownames(txi[[1]]), transcript_info[["tx"]]), ] + rownames(transcript_info) <- transcript_info[["tx"]] + + list(transcript = transcript_info, + gene = unique(transcript_info[,2:3]), + tx2gene = transcript_info[,1:2]) +} + +# Read and process sample/column data from a given path +read_coldata <- function(coldata_path){ + if (file.exists(coldata_path)) { + coldata <- read.csv(coldata_path, sep="\t") + coldata <- coldata[match(names, coldata[,1]),] + coldata <- cbind(files = fns, coldata) + } else { + message("ColData not available: ", coldata_path) + coldata <- data.frame(files = fns, names = names) + } + rownames(coldata) <- coldata[["names"]] +} + +# Create a SummarizedExperiment object with given data +create_summarized_experiment <- function(counts, abundance, length, col_data, row_data) { + SummarizedExperiment(assays = list(counts = counts, abundance = abundance, length = length), + colData = col_data, + rowData = row_data) +} + +# Main script starts here + +# Define pattern for file names based on quantification type +pattern <- ifelse(quant_type == "kallisto", "abundance.tsv", ".*quant_results\\.sf") +fns <- list.files(path, pattern = pattern, recursive = T, full.names = T) +names <- basename(fns) +names(fns) <- names +dropInfReps <- quant_type == "kallisto" + +# Import transcript-level quantifications +txi <- tximport(fns, type = quant_type, txOut = TRUE, dropInfReps = dropInfReps) + +# Read transcript and sample data +transcript_info <- read_transcript_info(tx2gene_path) +coldata <- read_coldata(coldata_path) + +# Create initial SummarizedExperiment object +se <- create_summarized_experiment(txi[["counts"]], txi[["abundance"]], txi[["length"]], + DataFrame(coldata), transcript_info$transcript) + +# Setting parameters for writing tables +params <- list( + list(obj = se, slot = "abundance", suffix = "transcript_tpm.tsv"), + list(obj = se, slot = "counts", suffix = "transcript_counts.tsv"), + list(obj = se, slot = "length", suffix = "transcript_lengths.tsv") +) + +# Process gene-level data if tx2gene mapping is available +if ("tx2gene" %in% names(transcript_info) && !is.null(transcript_info$tx2gene)) { + tx2gene <- transcript_info$tx2gene + gi <- summarizeToGene(txi, tx2gene = tx2gene) + gi.ls <- summarizeToGene(txi, tx2gene = tx2gene, countsFromAbundance = "lengthScaledTPM") + gi.s <- summarizeToGene(txi, tx2gene = tx2gene, countsFromAbundance = "scaledTPM") + + gene_info <- transcript_info$gene[match(rownames(gi[[1]]), transcript_info$gene[["gene_id"]]),] + rownames(gene_info) <- gene_info[["tx"]] + + col_data_frame <- DataFrame(coldata) + + # Create gene-level SummarizedExperiment objects + gse <- create_summarized_experiment(gi[["counts"]], gi[["abundance"]], gi[["length"]], + col_data_frame, gene_info) + gse.ls <- create_summarized_experiment(gi.ls[["counts"]], gi.ls[["abundance"]], gi.ls[["length"]], + col_data_frame, gene_info) + gse.s <- create_summarized_experiment(gi.s[["counts"]], gi.s[["abundance"]], gi.s[["length"]], + col_data_frame, gene_info) + + params <- c(params, list( + list(obj = gse, slot = "length", suffix = "gene_lengths.tsv"), + list(obj = gse, slot = "abundance", suffix = "gene_tpm.tsv"), + list(obj = gse, slot = "counts", suffix = "gene_counts.tsv"), + list(obj = gse.ls, slot = "abundance", suffix = "gene_tpm_length_scaled.tsv"), + list(obj = gse.ls, slot = "counts", suffix = "gene_counts_length_scaled.tsv"), + list(obj = gse.s, slot = "abundance", suffix = "gene_tpm_scaled.tsv"), + list(obj = gse.s, slot = "counts", suffix = "gene_counts_scaled.tsv") + )) +} + +# Writing tables for each set of parameters +done <- lapply(params, write_se_table) + +# Output session information and citations +# Removed for now because the 'tximeta' package is not found sometimes +# citation("tximeta") +sessionInfo() \ No newline at end of file diff --git a/target/executable/ucsc/bedclip/.config.vsh.yaml b/target/executable/ucsc/bedclip/.config.vsh.yaml new file mode 100644 index 0000000..f69eba3 --- /dev/null +++ b/target/executable/ucsc/bedclip/.config.vsh.yaml @@ -0,0 +1,204 @@ +name: "bedclip" +namespace: "ucsc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input_bedgraph" + description: "bedGraph file which should be converted" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sizes" + description: "File with chromosome sizes" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_bedgraph" + description: "bedGraph file after clipping" + info: null + default: + - "$id.$key.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Remove lines from bed file that refer to off-chromosome locations" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.bedgraph" +- type: "file" + path: "genome.sizes" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/ucsc/bedclip/main.nf" + - "modules/nf-core/ucsc/bedclip/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "rsync" + - "libcurl4" + interactive: false + - type: "docker" + run: + - "rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedClip\ + \ /usr/local/bin/\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/ucsc/bedclip/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/ucsc/bedclip" + executable: "target/executable/ucsc/bedclip/bedclip" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/ucsc/bedclip/bedclip b/target/executable/ucsc/bedclip/bedclip new file mode 100755 index 0000000..b3781f0 --- /dev/null +++ b/target/executable/ucsc/bedclip/bedclip @@ -0,0 +1,1131 @@ +#!/usr/bin/env bash + +# bedclip v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="bedclip" +VIASH_META_FUNCTIONALITY_NAME="bedclip" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y rsync libcurl4 && \ + rm -rf /var/lib/apt/lists/* + +RUN rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedClip /usr/local/bin/ + +LABEL org.opencontainers.image.description="Companion container for running component ucsc bedclip" +LABEL org.opencontainers.image.created="2025-05-07T12:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedclip v0.3.0" + echo "" + echo "Remove lines from bed file that refer to off-chromosome locations" + echo "" + echo "Input:" + echo " --input_bedgraph" + echo " type: file, file must exist" + echo " bedGraph file which should be converted" + echo "" + echo " --sizes" + echo " type: file, file must exist" + echo " File with chromosome sizes" + echo "" + echo "Output:" + echo " --output_bedgraph" + echo " type: file, output, file must exist" + echo " default: \$id.\$key.bedgraph" + echo " bedGraph file after clipping" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "bedclip v0.3.0" + exit + ;; + --input_bedgraph) + [ -n "$VIASH_PAR_INPUT_BEDGRAPH" ] && ViashError Bad arguments for option \'--input_bedgraph\': \'$VIASH_PAR_INPUT_BEDGRAPH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_BEDGRAPH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_bedgraph. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_bedgraph=*) + [ -n "$VIASH_PAR_INPUT_BEDGRAPH" ] && ViashError Bad arguments for option \'--input_bedgraph=*\': \'$VIASH_PAR_INPUT_BEDGRAPH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_BEDGRAPH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sizes) + [ -n "$VIASH_PAR_SIZES" ] && ViashError Bad arguments for option \'--sizes\': \'$VIASH_PAR_SIZES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SIZES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sizes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sizes=*) + [ -n "$VIASH_PAR_SIZES" ] && ViashError Bad arguments for option \'--sizes=*\': \'$VIASH_PAR_SIZES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SIZES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_bedgraph) + [ -n "$VIASH_PAR_OUTPUT_BEDGRAPH" ] && ViashError Bad arguments for option \'--output_bedgraph\': \'$VIASH_PAR_OUTPUT_BEDGRAPH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_BEDGRAPH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_bedgraph. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_bedgraph=*) + [ -n "$VIASH_PAR_OUTPUT_BEDGRAPH" ] && ViashError Bad arguments for option \'--output_bedgraph=*\': \'$VIASH_PAR_OUTPUT_BEDGRAPH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_BEDGRAPH=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/ucsc/bedclip:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OUTPUT_BEDGRAPH+x} ]; then + VIASH_PAR_OUTPUT_BEDGRAPH="\$id.\$key.bedgraph" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT_BEDGRAPH" ] && [ ! -e "$VIASH_PAR_INPUT_BEDGRAPH" ]; then + ViashError "Input file '$VIASH_PAR_INPUT_BEDGRAPH' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SIZES" ] && [ ! -e "$VIASH_PAR_SIZES" ]; then + ViashError "Input file '$VIASH_PAR_SIZES' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT_BEDGRAPH" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_BEDGRAPH")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_BEDGRAPH")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT_BEDGRAPH" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT_BEDGRAPH")" ) + VIASH_PAR_INPUT_BEDGRAPH=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT_BEDGRAPH") +fi +if [ ! -z "$VIASH_PAR_SIZES" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_SIZES")" ) + VIASH_PAR_SIZES=$(ViashDockerAutodetectMount "$VIASH_PAR_SIZES") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_BEDGRAPH" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_BEDGRAPH")" ) + VIASH_PAR_OUTPUT_BEDGRAPH=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_BEDGRAPH") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_BEDGRAPH" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bedclip-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT_BEDGRAPH+x} ]; then echo "${VIASH_PAR_INPUT_BEDGRAPH}" | sed "s#'#'\"'\"'#g;s#.*#par_input_bedgraph='&'#" ; else echo "# par_input_bedgraph="; fi ) +$( if [ ! -z ${VIASH_PAR_SIZES+x} ]; then echo "${VIASH_PAR_SIZES}" | sed "s#'#'\"'\"'#g;s#.*#par_sizes='&'#" ; else echo "# par_sizes="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_BEDGRAPH+x} ]; then echo "${VIASH_PAR_OUTPUT_BEDGRAPH}" | sed "s#'#'\"'\"'#g;s#.*#par_output_bedgraph='&'#" ; else echo "# par_output_bedgraph="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +bedClip \$par_input_bedgraph \$par_sizes \$par_output_bedgraph +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT_BEDGRAPH" ]; then + VIASH_PAR_INPUT_BEDGRAPH=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT_BEDGRAPH") + fi + if [ ! -z "$VIASH_PAR_SIZES" ]; then + VIASH_PAR_SIZES=$(ViashDockerStripAutomount "$VIASH_PAR_SIZES") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_BEDGRAPH" ]; then + VIASH_PAR_OUTPUT_BEDGRAPH=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_BEDGRAPH") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT_BEDGRAPH" ] && [ ! -e "$VIASH_PAR_OUTPUT_BEDGRAPH" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_BEDGRAPH' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/ucsc/bedclip/nextflow_labels.config b/target/executable/ucsc/bedclip/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/ucsc/bedclip/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/ucsc/bedgraphtobigwig/.config.vsh.yaml b/target/executable/ucsc/bedgraphtobigwig/.config.vsh.yaml new file mode 100644 index 0000000..061fab5 --- /dev/null +++ b/target/executable/ucsc/bedgraphtobigwig/.config.vsh.yaml @@ -0,0 +1,204 @@ +name: "bedgraphtobigwig" +namespace: "ucsc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--bedgraph" + description: "bedGraph file which should be converted" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sizes" + description: "File with chromosome sizes" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--bigwig" + description: "bigWig coverage file relative to genes on the input file" + info: null + default: + - "$id.$key.bigwig" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Convert a bedGraph file to bigWig format" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.bedgraph" +- type: "file" + path: "genome.sizes" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/ucsc/bedgraphtobigwig/main.nf" + - "modules/nf-core/ucsc/bedgraphtobigwig/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "rsync" + - "libcurl4" + interactive: false + - type: "docker" + run: + - "rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedGraphToBigWig\ + \ /usr/local/bin/\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/ucsc/bedgraphtobigwig/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/ucsc/bedgraphtobigwig" + executable: "target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig b/target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig new file mode 100755 index 0000000..4724ab1 --- /dev/null +++ b/target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig @@ -0,0 +1,1131 @@ +#!/usr/bin/env bash + +# bedgraphtobigwig v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="bedgraphtobigwig" +VIASH_META_FUNCTIONALITY_NAME="bedgraphtobigwig" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y rsync libcurl4 && \ + rm -rf /var/lib/apt/lists/* + +RUN rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedGraphToBigWig /usr/local/bin/ + +LABEL org.opencontainers.image.description="Companion container for running component ucsc bedgraphtobigwig" +LABEL org.opencontainers.image.created="2025-05-07T12:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="e21130ff7a4a215d28ec1988730cbc95c0259076" +LABEL org.opencontainers.image.version="v0.3.0" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedgraphtobigwig v0.3.0" + echo "" + echo "Convert a bedGraph file to bigWig format" + echo "" + echo "Input:" + echo " --bedgraph" + echo " type: file, file must exist" + echo " bedGraph file which should be converted" + echo "" + echo " --sizes" + echo " type: file, file must exist" + echo " File with chromosome sizes" + echo "" + echo "Output:" + echo " --bigwig" + echo " type: file, output, file must exist" + echo " default: \$id.\$key.bigwig" + echo " bigWig coverage file relative to genes on the input file" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "bedgraphtobigwig v0.3.0" + exit + ;; + --bedgraph) + [ -n "$VIASH_PAR_BEDGRAPH" ] && ViashError Bad arguments for option \'--bedgraph\': \'$VIASH_PAR_BEDGRAPH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bedgraph. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bedgraph=*) + [ -n "$VIASH_PAR_BEDGRAPH" ] && ViashError Bad arguments for option \'--bedgraph=*\': \'$VIASH_PAR_BEDGRAPH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sizes) + [ -n "$VIASH_PAR_SIZES" ] && ViashError Bad arguments for option \'--sizes\': \'$VIASH_PAR_SIZES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SIZES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sizes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sizes=*) + [ -n "$VIASH_PAR_SIZES" ] && ViashError Bad arguments for option \'--sizes=*\': \'$VIASH_PAR_SIZES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SIZES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bigwig) + [ -n "$VIASH_PAR_BIGWIG" ] && ViashError Bad arguments for option \'--bigwig\': \'$VIASH_PAR_BIGWIG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIGWIG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bigwig. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bigwig=*) + [ -n "$VIASH_PAR_BIGWIG" ] && ViashError Bad arguments for option \'--bigwig=*\': \'$VIASH_PAR_BIGWIG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIGWIG=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/ucsc/bedgraphtobigwig:v0.3.0' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_BIGWIG+x} ]; then + VIASH_PAR_BIGWIG="\$id.\$key.bigwig" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_BEDGRAPH" ] && [ ! -e "$VIASH_PAR_BEDGRAPH" ]; then + ViashError "Input file '$VIASH_PAR_BEDGRAPH' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SIZES" ] && [ ! -e "$VIASH_PAR_SIZES" ]; then + ViashError "Input file '$VIASH_PAR_SIZES' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_BIGWIG" ] && [ ! -d "$(dirname "$VIASH_PAR_BIGWIG")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BIGWIG")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_BEDGRAPH" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BEDGRAPH")" ) + VIASH_PAR_BEDGRAPH=$(ViashDockerAutodetectMount "$VIASH_PAR_BEDGRAPH") +fi +if [ ! -z "$VIASH_PAR_SIZES" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_SIZES")" ) + VIASH_PAR_SIZES=$(ViashDockerAutodetectMount "$VIASH_PAR_SIZES") +fi +if [ ! -z "$VIASH_PAR_BIGWIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BIGWIG")" ) + VIASH_PAR_BIGWIG=$(ViashDockerAutodetectMount "$VIASH_PAR_BIGWIG") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_BIGWIG" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bedgraphtobigwig-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_BEDGRAPH+x} ]; then echo "${VIASH_PAR_BEDGRAPH}" | sed "s#'#'\"'\"'#g;s#.*#par_bedgraph='&'#" ; else echo "# par_bedgraph="; fi ) +$( if [ ! -z ${VIASH_PAR_SIZES+x} ]; then echo "${VIASH_PAR_SIZES}" | sed "s#'#'\"'\"'#g;s#.*#par_sizes='&'#" ; else echo "# par_sizes="; fi ) +$( if [ ! -z ${VIASH_PAR_BIGWIG+x} ]; then echo "${VIASH_PAR_BIGWIG}" | sed "s#'#'\"'\"'#g;s#.*#par_bigwig='&'#" ; else echo "# par_bigwig="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +bedGraphToBigWig \$par_bedgraph \$par_sizes \$par_bigwig +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_BEDGRAPH" ]; then + VIASH_PAR_BEDGRAPH=$(ViashDockerStripAutomount "$VIASH_PAR_BEDGRAPH") + fi + if [ ! -z "$VIASH_PAR_SIZES" ]; then + VIASH_PAR_SIZES=$(ViashDockerStripAutomount "$VIASH_PAR_SIZES") + fi + if [ ! -z "$VIASH_PAR_BIGWIG" ]; then + VIASH_PAR_BIGWIG=$(ViashDockerStripAutomount "$VIASH_PAR_BIGWIG") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_BIGWIG" ] && [ ! -e "$VIASH_PAR_BIGWIG" ]; then + ViashError "Output file '$VIASH_PAR_BIGWIG' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/ucsc/bedgraphtobigwig/nextflow_labels.config b/target/executable/ucsc/bedgraphtobigwig/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/ucsc/bedgraphtobigwig/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/workflows/genome_alignment_and_quant/.config.vsh.yaml b/target/executable/workflows/genome_alignment_and_quant/.config.vsh.yaml new file mode 100644 index 0000000..0bc1b07 --- /dev/null +++ b/target/executable/workflows/genome_alignment_and_quant/.config.vsh.yaml @@ -0,0 +1,641 @@ +name: "genome_alignment_and_quant" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "ID of the sample." + info: null + example: + - "foo" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_1" + alternatives: + - "-i" + description: "Path to the sample (or read 1 of paired end sample)." + info: null + example: + - "input.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Path to read 2 of the sample." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, or\ + \ reverse" + info: null + required: false + choices: + - "forward" + - "reverse" + - "unstranded" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "GTF file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcript_fasta" + description: "Fasta file of the reference transcriptome." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_index" + description: "STAR index directory." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--star_ignore_sjdbgtf" + description: "When using pre-built STAR indices do not re-extract and use splice\ + \ junctions from the GTF file" + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--star_sjdb_gtf_feature_exon" + description: "Feature type in GTF file to be used as exons for building transcripts" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--bam_csi_index" + description: "Create a CSI index for BAM files instead of the traditional BAI\ + \ index. This will be required for genomes with larger chromosome sizes." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--umi_dedup_stats" + description: "Generate output stats when running \"umi_tools dedup\"." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--with_umi" + description: "Enable UMI-based read deduplication." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--salmon_quant_libtype" + description: "Override Salmon library type inferred based on strandedness defined\ + \ in meta object." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_salmon_quant_args" + description: "Extra arguments to pass to salmon quant command in addition to defaults\ + \ defined by the pipeline." + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_group_features" + description: "Define the attribute type used to group features in the GTF file\ + \ when running Salmon." + info: null + default: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_extra_attributes" + description: "By default, the pipeline uses the gene_name field to obtain additional\ + \ gene identifiers from the input GTF file when running Salmon." + info: null + default: + - "gene_name" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_rsem_calculate_expression_args" + description: "Extra arguments to pass to rsem-calculate-expression command in\ + \ addition to defaults defined by the pipeline." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--aligner" + description: "Specifies the alignment algorithm to use - available options are\ + \ 'star_salmon', 'star_rsem' and 'hisat2'." + info: null + default: + - "star_salmon" + required: false + choices: + - "star_salmon" + - "star_rsem" + - "hisat2" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_index" + description: "Path to directory for pre-built RSEM index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_index" + description: "Path to directory for pre-built Salmon index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--star_multiqc" + info: null + default: + - "$id_star.log" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_sorted" + info: null + default: + - "$id.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_index" + info: null + default: + - "$id.genome.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_stats" + info: null + default: + - "$id.genome.stats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_flagstat" + info: null + default: + - "$id.genome.flagstat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_idxstats" + info: null + default: + - "$id.genome.idxstats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam" + info: null + default: + - "$id.transcriptome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_index" + info: null + default: + - "$id.transcriptome.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_stats" + info: null + default: + - "$id.transcriptome.stats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_flagstat" + info: null + default: + - "$id.transcriptome.flagstat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_idxstats" + info: null + default: + - "$id.transcriptome.idxstats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_out_dir" + info: null + default: + - "$id.salmon_quant" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_results_file" + info: null + default: + - "$id.quant.sf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_counts_gene" + description: "Expression counts on gene level" + info: null + default: + - "$id.genes.results" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcripts" + description: "Expression counts on transcript level" + info: null + default: + - "$id.isoforms.results" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_multiqc" + description: "RSEM statistics" + info: null + default: + - "$id.stat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_star_rsem" + description: "BAM file generated by STAR (optional)" + info: null + default: + - "$id.STAR.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_genome_rsem" + description: "Genome BAM file (optional)" + info: null + default: + - "$id.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_transcript_rsem" + description: "Transcript BAM file (optional)" + info: null + default: + - "$id.transcript.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A viash sub-workflow for genome alignment and quantification stage of\ + \ nf-core/rnaseq pipeline.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "star/star_align_reads" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_sort" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_index" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_stats" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_flagstat" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_idxstats" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "umi_tools/umi_tools_dedup" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "umi_tools/umi_tools_prepareforrsem" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "salmon/salmon_quant" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "rsem/rsem_calculate_expression" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/genome_alignment_and_quant/config.vsh.yaml" + runner: "executable" + engine: "native" + output: "target/executable/workflows/genome_alignment_and_quant" + executable: "target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant b/target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant new file mode 100755 index 0000000..a36435e --- /dev/null +++ b/target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant @@ -0,0 +1,1851 @@ +#!/usr/bin/env bash + +# genome_alignment_and_quant v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="genome_alignment_and_quant" +VIASH_META_FUNCTIONALITY_NAME="genome_alignment_and_quant" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "genome_alignment_and_quant v0.3.0" + echo "" + echo "A viash sub-workflow for genome alignment and quantification stage of" + echo "nf-core/rnaseq pipeline." + echo "" + echo "Input:" + echo " --id" + echo " type: string, required parameter" + echo " example: foo" + echo " ID of the sample." + echo "" + echo " -i, --fastq_1" + echo " type: file, required parameter, file must exist" + echo " example: input.fastq.gz" + echo " Path to the sample (or read 1 of paired end sample)." + echo "" + echo " --fastq_2" + echo " type: file, file must exist" + echo " Path to read 2 of the sample." + echo "" + echo " --strandedness" + echo " type: string" + echo " choices: [ forward, reverse, unstranded ]" + echo " Sample strand-specificity. Must be one of unstranded, forward, or" + echo " reverse" + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo " GTF file" + echo "" + echo " --transcript_fasta" + echo " type: file, file must exist" + echo " Fasta file of the reference transcriptome." + echo "" + echo " --star_index" + echo " type: file, file must exist" + echo " STAR index directory." + echo "" + echo " --star_ignore_sjdbgtf" + echo " type: boolean" + echo " default: false" + echo " When using pre-built STAR indices do not re-extract and use splice" + echo " junctions from the GTF file" + echo "" + echo " --star_sjdb_gtf_feature_exon" + echo " type: string" + echo " Feature type in GTF file to be used as exons for building transcripts" + echo "" + echo " --bam_csi_index" + echo " type: boolean" + echo " default: false" + echo " Create a CSI index for BAM files instead of the traditional BAI index." + echo " This will be required for genomes with larger chromosome sizes." + echo "" + echo " --umi_dedup_stats" + echo " type: boolean" + echo " default: false" + echo " Generate output stats when running \"umi_tools dedup\"." + echo "" + echo " --with_umi" + echo " type: boolean" + echo " default: false" + echo " Enable UMI-based read deduplication." + echo "" + echo " --salmon_quant_libtype" + echo " type: string" + echo " Override Salmon library type inferred based on strandedness defined in" + echo " meta object." + echo "" + echo " --extra_salmon_quant_args" + echo " type: string" + echo " default:" + echo " Extra arguments to pass to salmon quant command in addition to defaults" + echo " defined by the pipeline." + echo "" + echo " --gtf_group_features" + echo " type: string" + echo " default: gene_id" + echo " Define the attribute type used to group features in the GTF file when" + echo " running Salmon." + echo "" + echo " --gtf_extra_attributes" + echo " type: string" + echo " default: gene_name" + echo " By default, the pipeline uses the gene_name field to obtain additional" + echo " gene identifiers from the input GTF file when running Salmon." + echo "" + echo " --extra_rsem_calculate_expression_args" + echo " type: string" + echo " Extra arguments to pass to rsem-calculate-expression command in addition" + echo " to defaults defined by the pipeline." + echo "" + echo " --aligner" + echo " type: string" + echo " default: star_salmon" + echo " choices: [ star_salmon, star_rsem, hisat2 ]" + echo " Specifies the alignment algorithm to use - available options are" + echo " 'star_salmon', 'star_rsem' and 'hisat2'." + echo "" + echo " --rsem_index" + echo " type: file, file must exist" + echo " Path to directory for pre-built RSEM index." + echo "" + echo " --salmon_index" + echo " type: file, file must exist" + echo " Path to directory for pre-built Salmon index." + echo "" + echo "Output:" + echo " --star_multiqc" + echo " type: file, output, file must exist" + echo " default: \$id_star.log" + echo "" + echo " --genome_bam_sorted" + echo " type: file, output, file must exist" + echo " default: \$id.genome.bam" + echo "" + echo " --genome_bam_index" + echo " type: file, output, file must exist" + echo " default: \$id.genome.bam.bai" + echo "" + echo " --genome_bam_stats" + echo " type: file, output, file must exist" + echo " default: \$id.genome.stats" + echo "" + echo " --genome_bam_flagstat" + echo " type: file, output, file must exist" + echo " default: \$id.genome.flagstat" + echo "" + echo " --genome_bam_idxstats" + echo " type: file, output, file must exist" + echo " default: \$id.genome.idxstats" + echo "" + echo " --transcriptome_bam" + echo " type: file, output, file must exist" + echo " default: \$id.transcriptome.bam" + echo "" + echo " --transcriptome_bam_index" + echo " type: file, output, file must exist" + echo " default: \$id.transcriptome.bam.bai" + echo "" + echo " --transcriptome_bam_stats" + echo " type: file, output, file must exist" + echo " default: \$id.transcriptome.stats" + echo "" + echo " --transcriptome_bam_flagstat" + echo " type: file, output, file must exist" + echo " default: \$id.transcriptome.flagstat" + echo "" + echo " --transcriptome_bam_idxstats" + echo " type: file, output, file must exist" + echo " default: \$id.transcriptome.idxstats" + echo "" + echo " --quant_out_dir" + echo " type: file, output, file must exist" + echo " default: \$id.salmon_quant" + echo "" + echo " --quant_results_file" + echo " type: file, output, file must exist" + echo " default: \$id.quant.sf" + echo "" + echo " --salmon_multiqc" + echo " type: file, output, file must exist" + echo "" + echo " --rsem_counts_gene" + echo " type: file, output, file must exist" + echo " default: \$id.genes.results" + echo " Expression counts on gene level" + echo "" + echo " --counts_transcripts" + echo " type: file, output, file must exist" + echo " default: \$id.isoforms.results" + echo " Expression counts on transcript level" + echo "" + echo " --rsem_multiqc" + echo " type: file, output, file must exist" + echo " default: \$id.stat" + echo " RSEM statistics" + echo "" + echo " --bam_star_rsem" + echo " type: file, output, file must exist" + echo " default: \$id.STAR.genome.bam" + echo " BAM file generated by STAR (optional)" + echo "" + echo " --bam_genome_rsem" + echo " type: file, output, file must exist" + echo " default: \$id.genome.bam" + echo " Genome BAM file (optional)" + echo "" + echo " --bam_transcript_rsem" + echo " type: file, output, file must exist" + echo " default: \$id.transcript.bam" + echo " Transcript BAM file (optional)" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "genome_alignment_and_quant v0.3.0" + exit + ;; + --id) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id=*) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id=*\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastq_1) + [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_1=*) + [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1=*\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_2) + [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_2=*) + [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2=*\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --strandedness) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --strandedness. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strandedness=*) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness=*\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf=*) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcript_fasta) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--transcript_fasta\': \'$VIASH_PAR_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcript_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcript_fasta=*) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--transcript_fasta=*\': \'$VIASH_PAR_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --star_index) + [ -n "$VIASH_PAR_STAR_INDEX" ] && ViashError Bad arguments for option \'--star_index\': \'$VIASH_PAR_STAR_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_index=*) + [ -n "$VIASH_PAR_STAR_INDEX" ] && ViashError Bad arguments for option \'--star_index=*\': \'$VIASH_PAR_STAR_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --star_ignore_sjdbgtf) + [ -n "$VIASH_PAR_STAR_IGNORE_SJDBGTF" ] && ViashError Bad arguments for option \'--star_ignore_sjdbgtf\': \'$VIASH_PAR_STAR_IGNORE_SJDBGTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_IGNORE_SJDBGTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_ignore_sjdbgtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_ignore_sjdbgtf=*) + [ -n "$VIASH_PAR_STAR_IGNORE_SJDBGTF" ] && ViashError Bad arguments for option \'--star_ignore_sjdbgtf=*\': \'$VIASH_PAR_STAR_IGNORE_SJDBGTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_IGNORE_SJDBGTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --star_sjdb_gtf_feature_exon) + [ -n "$VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON" ] && ViashError Bad arguments for option \'--star_sjdb_gtf_feature_exon\': \'$VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_sjdb_gtf_feature_exon. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_sjdb_gtf_feature_exon=*) + [ -n "$VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON" ] && ViashError Bad arguments for option \'--star_sjdb_gtf_feature_exon=*\': \'$VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bam_csi_index) + [ -n "$VIASH_PAR_BAM_CSI_INDEX" ] && ViashError Bad arguments for option \'--bam_csi_index\': \'$VIASH_PAR_BAM_CSI_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_CSI_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam_csi_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam_csi_index=*) + [ -n "$VIASH_PAR_BAM_CSI_INDEX" ] && ViashError Bad arguments for option \'--bam_csi_index=*\': \'$VIASH_PAR_BAM_CSI_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_CSI_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --umi_dedup_stats) + [ -n "$VIASH_PAR_UMI_DEDUP_STATS" ] && ViashError Bad arguments for option \'--umi_dedup_stats\': \'$VIASH_PAR_UMI_DEDUP_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMI_DEDUP_STATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --umi_dedup_stats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --umi_dedup_stats=*) + [ -n "$VIASH_PAR_UMI_DEDUP_STATS" ] && ViashError Bad arguments for option \'--umi_dedup_stats=*\': \'$VIASH_PAR_UMI_DEDUP_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMI_DEDUP_STATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --with_umi) + [ -n "$VIASH_PAR_WITH_UMI" ] && ViashError Bad arguments for option \'--with_umi\': \'$VIASH_PAR_WITH_UMI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WITH_UMI="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --with_umi. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --with_umi=*) + [ -n "$VIASH_PAR_WITH_UMI" ] && ViashError Bad arguments for option \'--with_umi=*\': \'$VIASH_PAR_WITH_UMI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WITH_UMI=$(ViashRemoveFlags "$1") + shift 1 + ;; + --salmon_quant_libtype) + [ -n "$VIASH_PAR_SALMON_QUANT_LIBTYPE" ] && ViashError Bad arguments for option \'--salmon_quant_libtype\': \'$VIASH_PAR_SALMON_QUANT_LIBTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_QUANT_LIBTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_quant_libtype. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_quant_libtype=*) + [ -n "$VIASH_PAR_SALMON_QUANT_LIBTYPE" ] && ViashError Bad arguments for option \'--salmon_quant_libtype=*\': \'$VIASH_PAR_SALMON_QUANT_LIBTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_QUANT_LIBTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --extra_salmon_quant_args) + [ -n "$VIASH_PAR_EXTRA_SALMON_QUANT_ARGS" ] && ViashError Bad arguments for option \'--extra_salmon_quant_args\': \'$VIASH_PAR_EXTRA_SALMON_QUANT_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_SALMON_QUANT_ARGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_salmon_quant_args. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --extra_salmon_quant_args=*) + [ -n "$VIASH_PAR_EXTRA_SALMON_QUANT_ARGS" ] && ViashError Bad arguments for option \'--extra_salmon_quant_args=*\': \'$VIASH_PAR_EXTRA_SALMON_QUANT_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_SALMON_QUANT_ARGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf_group_features) + [ -n "$VIASH_PAR_GTF_GROUP_FEATURES" ] && ViashError Bad arguments for option \'--gtf_group_features\': \'$VIASH_PAR_GTF_GROUP_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_GROUP_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_group_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf_group_features=*) + [ -n "$VIASH_PAR_GTF_GROUP_FEATURES" ] && ViashError Bad arguments for option \'--gtf_group_features=*\': \'$VIASH_PAR_GTF_GROUP_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_GROUP_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf_extra_attributes) + [ -n "$VIASH_PAR_GTF_EXTRA_ATTRIBUTES" ] && ViashError Bad arguments for option \'--gtf_extra_attributes\': \'$VIASH_PAR_GTF_EXTRA_ATTRIBUTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_EXTRA_ATTRIBUTES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_extra_attributes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf_extra_attributes=*) + [ -n "$VIASH_PAR_GTF_EXTRA_ATTRIBUTES" ] && ViashError Bad arguments for option \'--gtf_extra_attributes=*\': \'$VIASH_PAR_GTF_EXTRA_ATTRIBUTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_EXTRA_ATTRIBUTES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --extra_rsem_calculate_expression_args) + [ -n "$VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS" ] && ViashError Bad arguments for option \'--extra_rsem_calculate_expression_args\': \'$VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_rsem_calculate_expression_args. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --extra_rsem_calculate_expression_args=*) + [ -n "$VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS" ] && ViashError Bad arguments for option \'--extra_rsem_calculate_expression_args=*\': \'$VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --aligner) + [ -n "$VIASH_PAR_ALIGNER" ] && ViashError Bad arguments for option \'--aligner\': \'$VIASH_PAR_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --aligner. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --aligner=*) + [ -n "$VIASH_PAR_ALIGNER" ] && ViashError Bad arguments for option \'--aligner=*\': \'$VIASH_PAR_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --rsem_index) + [ -n "$VIASH_PAR_RSEM_INDEX" ] && ViashError Bad arguments for option \'--rsem_index\': \'$VIASH_PAR_RSEM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --rsem_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --rsem_index=*) + [ -n "$VIASH_PAR_RSEM_INDEX" ] && ViashError Bad arguments for option \'--rsem_index=*\': \'$VIASH_PAR_RSEM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --salmon_index) + [ -n "$VIASH_PAR_SALMON_INDEX" ] && ViashError Bad arguments for option \'--salmon_index\': \'$VIASH_PAR_SALMON_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_index=*) + [ -n "$VIASH_PAR_SALMON_INDEX" ] && ViashError Bad arguments for option \'--salmon_index=*\': \'$VIASH_PAR_SALMON_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --star_multiqc) + [ -n "$VIASH_PAR_STAR_MULTIQC" ] && ViashError Bad arguments for option \'--star_multiqc\': \'$VIASH_PAR_STAR_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_multiqc=*) + [ -n "$VIASH_PAR_STAR_MULTIQC" ] && ViashError Bad arguments for option \'--star_multiqc=*\': \'$VIASH_PAR_STAR_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_sorted) + [ -n "$VIASH_PAR_GENOME_BAM_SORTED" ] && ViashError Bad arguments for option \'--genome_bam_sorted\': \'$VIASH_PAR_GENOME_BAM_SORTED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_SORTED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_sorted. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_sorted=*) + [ -n "$VIASH_PAR_GENOME_BAM_SORTED" ] && ViashError Bad arguments for option \'--genome_bam_sorted=*\': \'$VIASH_PAR_GENOME_BAM_SORTED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_SORTED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_index) + [ -n "$VIASH_PAR_GENOME_BAM_INDEX" ] && ViashError Bad arguments for option \'--genome_bam_index\': \'$VIASH_PAR_GENOME_BAM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_index=*) + [ -n "$VIASH_PAR_GENOME_BAM_INDEX" ] && ViashError Bad arguments for option \'--genome_bam_index=*\': \'$VIASH_PAR_GENOME_BAM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_stats) + [ -n "$VIASH_PAR_GENOME_BAM_STATS" ] && ViashError Bad arguments for option \'--genome_bam_stats\': \'$VIASH_PAR_GENOME_BAM_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_STATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_stats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_stats=*) + [ -n "$VIASH_PAR_GENOME_BAM_STATS" ] && ViashError Bad arguments for option \'--genome_bam_stats=*\': \'$VIASH_PAR_GENOME_BAM_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_STATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_flagstat) + [ -n "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && ViashError Bad arguments for option \'--genome_bam_flagstat\': \'$VIASH_PAR_GENOME_BAM_FLAGSTAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_FLAGSTAT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_flagstat. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_flagstat=*) + [ -n "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && ViashError Bad arguments for option \'--genome_bam_flagstat=*\': \'$VIASH_PAR_GENOME_BAM_FLAGSTAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_FLAGSTAT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_idxstats) + [ -n "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && ViashError Bad arguments for option \'--genome_bam_idxstats\': \'$VIASH_PAR_GENOME_BAM_IDXSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_IDXSTATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_idxstats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_idxstats=*) + [ -n "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && ViashError Bad arguments for option \'--genome_bam_idxstats=*\': \'$VIASH_PAR_GENOME_BAM_IDXSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_IDXSTATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcriptome_bam) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM" ] && ViashError Bad arguments for option \'--transcriptome_bam\': \'$VIASH_PAR_TRANSCRIPTOME_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_bam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_bam=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM" ] && ViashError Bad arguments for option \'--transcriptome_bam=*\': \'$VIASH_PAR_TRANSCRIPTOME_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcriptome_bam_index) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX" ] && ViashError Bad arguments for option \'--transcriptome_bam_index\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_bam_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_bam_index=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX" ] && ViashError Bad arguments for option \'--transcriptome_bam_index=*\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcriptome_bam_stats) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS" ] && ViashError Bad arguments for option \'--transcriptome_bam_stats\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_STATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_bam_stats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_bam_stats=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS" ] && ViashError Bad arguments for option \'--transcriptome_bam_stats=*\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_STATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcriptome_bam_flagstat) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT" ] && ViashError Bad arguments for option \'--transcriptome_bam_flagstat\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_bam_flagstat. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_bam_flagstat=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT" ] && ViashError Bad arguments for option \'--transcriptome_bam_flagstat=*\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcriptome_bam_idxstats) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS" ] && ViashError Bad arguments for option \'--transcriptome_bam_idxstats\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_bam_idxstats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_bam_idxstats=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS" ] && ViashError Bad arguments for option \'--transcriptome_bam_idxstats=*\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quant_out_dir) + [ -n "$VIASH_PAR_QUANT_OUT_DIR" ] && ViashError Bad arguments for option \'--quant_out_dir\': \'$VIASH_PAR_QUANT_OUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_OUT_DIR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_out_dir. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quant_out_dir=*) + [ -n "$VIASH_PAR_QUANT_OUT_DIR" ] && ViashError Bad arguments for option \'--quant_out_dir=*\': \'$VIASH_PAR_QUANT_OUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_OUT_DIR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quant_results_file) + [ -n "$VIASH_PAR_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--quant_results_file\': \'$VIASH_PAR_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_RESULTS_FILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_results_file. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quant_results_file=*) + [ -n "$VIASH_PAR_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--quant_results_file=*\': \'$VIASH_PAR_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_RESULTS_FILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --salmon_multiqc) + [ -n "$VIASH_PAR_SALMON_MULTIQC" ] && ViashError Bad arguments for option \'--salmon_multiqc\': \'$VIASH_PAR_SALMON_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_multiqc=*) + [ -n "$VIASH_PAR_SALMON_MULTIQC" ] && ViashError Bad arguments for option \'--salmon_multiqc=*\': \'$VIASH_PAR_SALMON_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --rsem_counts_gene) + [ -n "$VIASH_PAR_RSEM_COUNTS_GENE" ] && ViashError Bad arguments for option \'--rsem_counts_gene\': \'$VIASH_PAR_RSEM_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_COUNTS_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --rsem_counts_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --rsem_counts_gene=*) + [ -n "$VIASH_PAR_RSEM_COUNTS_GENE" ] && ViashError Bad arguments for option \'--rsem_counts_gene=*\': \'$VIASH_PAR_RSEM_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_COUNTS_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_transcripts) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPTS" ] && ViashError Bad arguments for option \'--counts_transcripts\': \'$VIASH_PAR_COUNTS_TRANSCRIPTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_transcripts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_transcripts=*) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPTS" ] && ViashError Bad arguments for option \'--counts_transcripts=*\': \'$VIASH_PAR_COUNTS_TRANSCRIPTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --rsem_multiqc) + [ -n "$VIASH_PAR_RSEM_MULTIQC" ] && ViashError Bad arguments for option \'--rsem_multiqc\': \'$VIASH_PAR_RSEM_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --rsem_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --rsem_multiqc=*) + [ -n "$VIASH_PAR_RSEM_MULTIQC" ] && ViashError Bad arguments for option \'--rsem_multiqc=*\': \'$VIASH_PAR_RSEM_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bam_star_rsem) + [ -n "$VIASH_PAR_BAM_STAR_RSEM" ] && ViashError Bad arguments for option \'--bam_star_rsem\': \'$VIASH_PAR_BAM_STAR_RSEM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_STAR_RSEM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam_star_rsem. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam_star_rsem=*) + [ -n "$VIASH_PAR_BAM_STAR_RSEM" ] && ViashError Bad arguments for option \'--bam_star_rsem=*\': \'$VIASH_PAR_BAM_STAR_RSEM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_STAR_RSEM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bam_genome_rsem) + [ -n "$VIASH_PAR_BAM_GENOME_RSEM" ] && ViashError Bad arguments for option \'--bam_genome_rsem\': \'$VIASH_PAR_BAM_GENOME_RSEM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_GENOME_RSEM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam_genome_rsem. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam_genome_rsem=*) + [ -n "$VIASH_PAR_BAM_GENOME_RSEM" ] && ViashError Bad arguments for option \'--bam_genome_rsem=*\': \'$VIASH_PAR_BAM_GENOME_RSEM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_GENOME_RSEM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bam_transcript_rsem) + [ -n "$VIASH_PAR_BAM_TRANSCRIPT_RSEM" ] && ViashError Bad arguments for option \'--bam_transcript_rsem\': \'$VIASH_PAR_BAM_TRANSCRIPT_RSEM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_TRANSCRIPT_RSEM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam_transcript_rsem. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam_transcript_rsem=*) + [ -n "$VIASH_PAR_BAM_TRANSCRIPT_RSEM" ] && ViashError Bad arguments for option \'--bam_transcript_rsem=*\': \'$VIASH_PAR_BAM_TRANSCRIPT_RSEM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_TRANSCRIPT_RSEM=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: native." + exit 1 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_ID+x} ]; then + ViashError '--id' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_FASTQ_1+x} ]; then + ViashError '--fastq_1' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_STAR_IGNORE_SJDBGTF+x} ]; then + VIASH_PAR_STAR_IGNORE_SJDBGTF="false" +fi +if [ -z ${VIASH_PAR_BAM_CSI_INDEX+x} ]; then + VIASH_PAR_BAM_CSI_INDEX="false" +fi +if [ -z ${VIASH_PAR_UMI_DEDUP_STATS+x} ]; then + VIASH_PAR_UMI_DEDUP_STATS="false" +fi +if [ -z ${VIASH_PAR_WITH_UMI+x} ]; then + VIASH_PAR_WITH_UMI="false" +fi +if [ -z ${VIASH_PAR_EXTRA_SALMON_QUANT_ARGS+x} ]; then + VIASH_PAR_EXTRA_SALMON_QUANT_ARGS="" +fi +if [ -z ${VIASH_PAR_GTF_GROUP_FEATURES+x} ]; then + VIASH_PAR_GTF_GROUP_FEATURES="gene_id" +fi +if [ -z ${VIASH_PAR_GTF_EXTRA_ATTRIBUTES+x} ]; then + VIASH_PAR_GTF_EXTRA_ATTRIBUTES="gene_name" +fi +if [ -z ${VIASH_PAR_ALIGNER+x} ]; then + VIASH_PAR_ALIGNER="star_salmon" +fi +if [ -z ${VIASH_PAR_STAR_MULTIQC+x} ]; then + VIASH_PAR_STAR_MULTIQC="\$id_star.log" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_SORTED+x} ]; then + VIASH_PAR_GENOME_BAM_SORTED="\$id.genome.bam" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_INDEX+x} ]; then + VIASH_PAR_GENOME_BAM_INDEX="\$id.genome.bam.bai" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_STATS+x} ]; then + VIASH_PAR_GENOME_BAM_STATS="\$id.genome.stats" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_FLAGSTAT+x} ]; then + VIASH_PAR_GENOME_BAM_FLAGSTAT="\$id.genome.flagstat" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_IDXSTATS+x} ]; then + VIASH_PAR_GENOME_BAM_IDXSTATS="\$id.genome.idxstats" +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME_BAM+x} ]; then + VIASH_PAR_TRANSCRIPTOME_BAM="\$id.transcriptome.bam" +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME_BAM_INDEX+x} ]; then + VIASH_PAR_TRANSCRIPTOME_BAM_INDEX="\$id.transcriptome.bam.bai" +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME_BAM_STATS+x} ]; then + VIASH_PAR_TRANSCRIPTOME_BAM_STATS="\$id.transcriptome.stats" +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT+x} ]; then + VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT="\$id.transcriptome.flagstat" +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS+x} ]; then + VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS="\$id.transcriptome.idxstats" +fi +if [ -z ${VIASH_PAR_QUANT_OUT_DIR+x} ]; then + VIASH_PAR_QUANT_OUT_DIR="\$id.salmon_quant" +fi +if [ -z ${VIASH_PAR_QUANT_RESULTS_FILE+x} ]; then + VIASH_PAR_QUANT_RESULTS_FILE="\$id.quant.sf" +fi +if [ -z ${VIASH_PAR_RSEM_COUNTS_GENE+x} ]; then + VIASH_PAR_RSEM_COUNTS_GENE="\$id.genes.results" +fi +if [ -z ${VIASH_PAR_COUNTS_TRANSCRIPTS+x} ]; then + VIASH_PAR_COUNTS_TRANSCRIPTS="\$id.isoforms.results" +fi +if [ -z ${VIASH_PAR_RSEM_MULTIQC+x} ]; then + VIASH_PAR_RSEM_MULTIQC="\$id.stat" +fi +if [ -z ${VIASH_PAR_BAM_STAR_RSEM+x} ]; then + VIASH_PAR_BAM_STAR_RSEM="\$id.STAR.genome.bam" +fi +if [ -z ${VIASH_PAR_BAM_GENOME_RSEM+x} ]; then + VIASH_PAR_BAM_GENOME_RSEM="\$id.genome.bam" +fi +if [ -z ${VIASH_PAR_BAM_TRANSCRIPT_RSEM+x} ]; then + VIASH_PAR_BAM_TRANSCRIPT_RSEM="\$id.transcript.bam" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTQ_1" ] && [ ! -e "$VIASH_PAR_FASTQ_1" ]; then + ViashError "Input file '$VIASH_PAR_FASTQ_1' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_FASTQ_2" ] && [ ! -e "$VIASH_PAR_FASTQ_2" ]; then + ViashError "Input file '$VIASH_PAR_FASTQ_2' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then + ViashError "Input file '$VIASH_PAR_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPT_FASTA" ] && [ ! -e "$VIASH_PAR_TRANSCRIPT_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_TRANSCRIPT_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STAR_INDEX" ] && [ ! -e "$VIASH_PAR_STAR_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_STAR_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RSEM_INDEX" ] && [ ! -e "$VIASH_PAR_RSEM_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_RSEM_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SALMON_INDEX" ] && [ ! -e "$VIASH_PAR_SALMON_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_SALMON_INDEX' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_STAR_IGNORE_SJDBGTF" ]]; then + if ! [[ "$VIASH_PAR_STAR_IGNORE_SJDBGTF" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--star_ignore_sjdbgtf' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_BAM_CSI_INDEX" ]]; then + if ! [[ "$VIASH_PAR_BAM_CSI_INDEX" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--bam_csi_index' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_UMI_DEDUP_STATS" ]]; then + if ! [[ "$VIASH_PAR_UMI_DEDUP_STATS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--umi_dedup_stats' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WITH_UMI" ]]; then + if ! [[ "$VIASH_PAR_WITH_UMI" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--with_umi' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_STRANDEDNESS" ]; then + VIASH_PAR_STRANDEDNESS_CHOICES=("forward;reverse;unstranded") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_STRANDEDNESS_CHOICES[*]};" =~ ";$VIASH_PAR_STRANDEDNESS;" ]]; then + ViashError '--strandedness' specified value of \'$VIASH_PAR_STRANDEDNESS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_ALIGNER" ]; then + VIASH_PAR_ALIGNER_CHOICES=("star_salmon;star_rsem;hisat2") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_ALIGNER_CHOICES[*]};" =~ ";$VIASH_PAR_ALIGNER;" ]]; then + ViashError '--aligner' specified value of \'$VIASH_PAR_ALIGNER\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_STAR_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_STAR_MULTIQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STAR_MULTIQC")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_SORTED" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_SORTED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_SORTED")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_INDEX" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_INDEX")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_INDEX")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_STATS" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_STATS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_STATS")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_FLAGSTAT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_FLAGSTAT")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_IDXSTATS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_IDXSTATS")" +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM" ] && [ ! -d "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM")" +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX" ] && [ ! -d "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX")" +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS" ] && [ ! -d "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS")" +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT" ] && [ ! -d "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT")" +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS" ] && [ ! -d "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS")" +fi +if [ ! -z "$VIASH_PAR_QUANT_OUT_DIR" ] && [ ! -d "$(dirname "$VIASH_PAR_QUANT_OUT_DIR")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUANT_OUT_DIR")" +fi +if [ ! -z "$VIASH_PAR_QUANT_RESULTS_FILE" ] && [ ! -d "$(dirname "$VIASH_PAR_QUANT_RESULTS_FILE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUANT_RESULTS_FILE")" +fi +if [ ! -z "$VIASH_PAR_SALMON_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_SALMON_MULTIQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_SALMON_MULTIQC")" +fi +if [ ! -z "$VIASH_PAR_RSEM_COUNTS_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_RSEM_COUNTS_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_RSEM_COUNTS_GENE")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPTS" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPTS")" +fi +if [ ! -z "$VIASH_PAR_RSEM_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_RSEM_MULTIQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_RSEM_MULTIQC")" +fi +if [ ! -z "$VIASH_PAR_BAM_STAR_RSEM" ] && [ ! -d "$(dirname "$VIASH_PAR_BAM_STAR_RSEM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BAM_STAR_RSEM")" +fi +if [ ! -z "$VIASH_PAR_BAM_GENOME_RSEM" ] && [ ! -d "$(dirname "$VIASH_PAR_BAM_GENOME_RSEM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BAM_GENOME_RSEM")" +fi +if [ ! -z "$VIASH_PAR_BAM_TRANSCRIPT_RSEM" ] && [ ! -d "$(dirname "$VIASH_PAR_BAM_TRANSCRIPT_RSEM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BAM_TRANSCRIPT_RSEM")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + + +# set dependency paths +VIASH_DEP_STAR_STAR_ALIGN_READS="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/main.nf" +VIASH_DEP_SAMTOOLS_SAMTOOLS_SORT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/main.nf" +VIASH_DEP_SAMTOOLS_SAMTOOLS_INDEX="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/main.nf" +VIASH_DEP_SAMTOOLS_SAMTOOLS_STATS="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/main.nf" +VIASH_DEP_SAMTOOLS_SAMTOOLS_FLAGSTAT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/main.nf" +VIASH_DEP_SAMTOOLS_SAMTOOLS_IDXSTATS="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/main.nf" +VIASH_DEP_UMI_TOOLS_UMI_TOOLS_DEDUP="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/main.nf" +VIASH_DEP_UMI_TOOLS_UMI_TOOLS_PREPAREFORRSEM="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf" +VIASH_DEP_SALMON_SALMON_QUANT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/main.nf" +VIASH_DEP_RSEM_RSEM_CALCULATE_EXPRESSION="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/main.nf" + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-genome_alignment_and_quant-XXXXXX").nf +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +//// VIASH START +// The following code has been auto-generated by Viash. + +//// VIASH END +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def paired = input.size() == 2 + [ id, state + [ paired: paired, input: input ] ] + } + + | star_align_reads.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "input": "fastq_1", + "input_r2": "fastq_2", + "genome_dir": "star_index", + "sjdb_gtf_file": "gtf", + "out_sam_attr_rg_line": "star_sam_attr_rg_line", + "sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon" + ], + toState: [ + "genome_bam": "aligned_reads", + "transcriptome_bam": "reads_aligned_to_transcriptome", + "star_multiqc": "log" + ], + args: [ + quant_mode: "TranscriptomeSAM", + twopass_mode: "Basic", + out_sam_type: "BAM;Unsorted", + run_rng_seed: 0, + out_filter_multimap_nmax: 20, + align_sjdb_overhang_min: 1, + out_sam_attributes: "NH;HI;AS;NM;MD", + quant_transcriptome_sam_output: "BanSingleEnd" + ], + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // GENOME BAM + | samtools_sort.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: ["input": "genome_bam"], + toState: ["genome_bam_sorted": "output"], + key: "genome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // + // Remove duplicate reads from BAM file based on UMIs + // + + // Deduplicate genome BAM file + | umi_tools_dedup.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: { id, state -> + def output_stats = state.umi_dedup_stats ? state.id : + [ paired: state.paired, + input: state.genome_bam, + bai: state.genome_bam_index, + output_stats: output_stats] + }, + toState: [ "genome_bam_sorted": "output" ], + key: "genome_deduped", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_deduped", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_deduped_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_deduped_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta", + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_deduped_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // Deduplicate transcriptome BAM file + + | samtools_sort.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam" ], + toState: [ "transcriptome_bam": "output" ], + key: "transcriptome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "csi": "bam_csi_index" + ], + toState: [ "transcriptome_bam_index": "output" ], + key: "transcriptome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "bai": "transcriptome_bam_index", + ], + toState: [ "transcriptome_bam_stats": "output" ], + key: "transcriptome_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_flagstat": "output" ], + key: "transcriptome_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_idxstats": "output" ], + key: "transcriptome_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + | umi_tools_dedup.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: { id, state -> + def output_stats = state.umi_dedup_stats ? state.id : + [ paired: state.paired, + input: state.transcriptome_bam, + bai: state.transcriptome_bam_index, + output_stats: output_stats] + }, + toState: [ "transcriptome_bam_deduped": "output" ], + key: "transcriptome_deduped", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_sort.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam_deduped" ], + toState: [ "transcriptome_bam": "output" ], + key: "transcriptome_deduped_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "csi": "bam_csi_index" + ], + toState: [ "transcriptome_bam_index": "output" ], + key: "transcriptome_deduped_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_stats": "output" ], + key: "transcriptome_deduped_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_flagstat": "output" ], + key: "transcriptome_deduped_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_idxstats": "output" ], + key: "transcriptome_deduped_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // Fix paired-end reads in name sorted BAM file + | umi_tools_prepareforrsem.run( + runIf: { id, state -> state.with_umi && state.paired && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam" ], + toState: [ "transcriptome_bam": "output" ], + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // Infer lib-type for salmon quant + | map { id, state -> + def lib_type = (state.paired) ? + ( + (state.strandedness == "forward") ? + "ISF" : + ( + (state.strandedness == "reverse") ? "ISR" : "IU" + ) + ) + : ( + (state.strandedness == "forward") ? + "SF" : + ( + (state.strandedness == "reverse") ? "SR" : "U" + ) + ) + [ id, state + [lib_type: lib_type] ] + } + + // Count reads from BAM alignments using Salmon + | salmon_quant.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "lib_type": "lib_type", + "alignments": "transcriptome_bam", + "targets": "transcript_fasta", + "gene_map": "gtf" + ], + toState: [ + "quant_out_dir": "output", + "quant_results_file": "quant_results" + ], + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + | map { id, state -> + def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state + [ id, mod_state ] + } + + | rsem_calculate_expression.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "id": "id", + "strandedness": "strandedness", + "paired": "paired", + "input": "input", + "index": "rsem_index", + "counts_gene": "rsem_counts_gene", + "counts_transcripts": "rsem_counts_transcripts", + "stat": "rsem_multiqc", + "logs": "star_multiqc", + "bam_star": "bam_star_rsem", + "bam_genome": "bam_genome_rsem", + "bam_transcript": "bam_transcript_rsem" + ], + args: [ + star: true, + star_output_genome_bam: true, + star_gzipped_read_file: true, + estimate_rspd: true, + seed: 1 + ], + toState: [ + "rsem_counts_gene": "counts_gene", + "rsem_counts_transcripts": "counts_transcripts", + "rsem_multiqc": "stat", + "star_multiqc": "logs", + "bam_star_rsem": "bam_star", + "bam_genome_rsem": "bam_genome", + "bam_transcript_rsem": "bam_transcript" + ], + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // RSEM_Star BAM + | samtools_sort.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: ["input": "bam_star_rsem"], + toState: ["genome_bam_sorted": "output"], + key: "genome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + [ "star_multiqc": "star_multiqc", + "rsem_multiqc": "rsem_multiqc", + "salmon_multiqc": "salmon_multiqc", + "genome_bam_sorted": "genome_bam_sorted", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "transcriptome_bam": "transcriptome_bam", + "transcriptome_bam_index": "transcriptome_bam_index", + "transcriptome_bam_stats": "transcriptome_bam_stats", + "transcriptome_bam_flagstat": "transcriptome_bam_flagstat", + "transcriptome_bam_idxstats": "transcriptome_bam_idxstats", + "quant_out_dir": "quant_out_dir", + "quant_results_file": "quant_results_file", + "rsem_counts_gene": "rsem_counts_gene", + "rsem_counts_transcripts": "rsem_counts_transcripts", + "bam_genome_rsem": "bam_genome_rsem", + "bam_transcript_rsem": "bam_transcript_rsem" + ] + ) + + emit: + output_ch +} +VIASHMAIN +nextflow run . -main-script "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_STAR_MULTIQC" ] && [ ! -e "$VIASH_PAR_STAR_MULTIQC" ]; then + ViashError "Output file '$VIASH_PAR_STAR_MULTIQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_SORTED" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_SORTED" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_SORTED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_INDEX" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_INDEX" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_STATS" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_STATS" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_STATS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_FLAGSTAT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_IDXSTATS" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_IDXSTATS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_BAM" ]; then + ViashError "Output file '$VIASH_PAR_TRANSCRIPTOME_BAM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX" ]; then + ViashError "Output file '$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS" ]; then + ViashError "Output file '$VIASH_PAR_TRANSCRIPTOME_BAM_STATS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT" ]; then + ViashError "Output file '$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS" ]; then + ViashError "Output file '$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUANT_OUT_DIR" ] && [ ! -e "$VIASH_PAR_QUANT_OUT_DIR" ]; then + ViashError "Output file '$VIASH_PAR_QUANT_OUT_DIR' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUANT_RESULTS_FILE" ] && [ ! -e "$VIASH_PAR_QUANT_RESULTS_FILE" ]; then + ViashError "Output file '$VIASH_PAR_QUANT_RESULTS_FILE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SALMON_MULTIQC" ] && [ ! -e "$VIASH_PAR_SALMON_MULTIQC" ]; then + ViashError "Output file '$VIASH_PAR_SALMON_MULTIQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RSEM_COUNTS_GENE" ] && [ ! -e "$VIASH_PAR_RSEM_COUNTS_GENE" ]; then + ViashError "Output file '$VIASH_PAR_RSEM_COUNTS_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPTS" ] && [ ! -e "$VIASH_PAR_COUNTS_TRANSCRIPTS" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_TRANSCRIPTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RSEM_MULTIQC" ] && [ ! -e "$VIASH_PAR_RSEM_MULTIQC" ]; then + ViashError "Output file '$VIASH_PAR_RSEM_MULTIQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BAM_STAR_RSEM" ] && [ ! -e "$VIASH_PAR_BAM_STAR_RSEM" ]; then + ViashError "Output file '$VIASH_PAR_BAM_STAR_RSEM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BAM_GENOME_RSEM" ] && [ ! -e "$VIASH_PAR_BAM_GENOME_RSEM" ]; then + ViashError "Output file '$VIASH_PAR_BAM_GENOME_RSEM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BAM_TRANSCRIPT_RSEM" ] && [ ! -e "$VIASH_PAR_BAM_TRANSCRIPT_RSEM" ]; then + ViashError "Output file '$VIASH_PAR_BAM_TRANSCRIPT_RSEM' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/workflows/genome_alignment_and_quant/nextflow_labels.config b/target/executable/workflows/genome_alignment_and_quant/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/workflows/genome_alignment_and_quant/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/workflows/merge_quant_results/.config.vsh.yaml b/target/executable/workflows/merge_quant_results/.config.vsh.yaml new file mode 100644 index 0000000..46d663d --- /dev/null +++ b/target/executable/workflows/merge_quant_results/.config.vsh.yaml @@ -0,0 +1,322 @@ +name: "merge_quant_results" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--salmon_quant_results" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--kallisto_quant_results" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--gtf" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_extra_attributes" + info: null + default: + - "gene_name" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_group_features" + info: null + default: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--quant_type" + description: "Quantification method used." + info: null + default: + - "salmon" + required: false + choices: + - "salmon" + - "kallisto" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--versions" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--tpm_gene" + info: null + example: + - "gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene" + info: null + example: + - "gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_length_scaled" + info: null + example: + - "gene_counts_length_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_scaled" + info: null + example: + - "gene_counts_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_transcript" + info: null + example: + - "transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--lengths_gene" + info: null + example: + - "gene_length.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcript" + info: null + example: + - "transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--lengths_transcript" + info: null + example: + - "transcript_length.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_merged_summarizedexperiment" + info: null + example: + - "quant_merged_summarizedexperiment" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A sub-workflow to merge the counts obtained from salmon quant across\ + \ all samples." +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "tx2gene" + repository: + type: "local" +- name: "tximport" + repository: + type: "local" +- name: "summarizedexperiment" + repository: + type: "local" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/merge_quant_results/config.vsh.yaml" + runner: "executable" + engine: "native" + output: "target/executable/workflows/merge_quant_results" + executable: "target/executable/workflows/merge_quant_results/merge_quant_results" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/nextflow/tx2gene" + - "target/nextflow/tximport" + - "target/nextflow/summarizedexperiment" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/workflows/merge_quant_results/merge_quant_results b/target/executable/workflows/merge_quant_results/merge_quant_results new file mode 100755 index 0000000..fe44d92 --- /dev/null +++ b/target/executable/workflows/merge_quant_results/merge_quant_results @@ -0,0 +1,905 @@ +#!/usr/bin/env bash + +# merge_quant_results v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="merge_quant_results" +VIASH_META_FUNCTIONALITY_NAME="merge_quant_results" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "merge_quant_results v0.3.0" + echo "" + echo "A sub-workflow to merge the counts obtained from salmon quant across all" + echo "samples." + echo "" + echo "Input:" + echo " --salmon_quant_results" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --kallisto_quant_results" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo "" + echo " --gtf_extra_attributes" + echo " type: string" + echo " default: gene_name" + echo "" + echo " --gtf_group_features" + echo " type: string" + echo " default: gene_id" + echo "" + echo " --quant_type" + echo " type: string" + echo " default: salmon" + echo " choices: [ salmon, kallisto ]" + echo " Quantification method used." + echo "" + echo " --versions" + echo " type: file" + echo "" + echo "Output:" + echo " --tpm_gene" + echo " type: file, output, file must exist" + echo " example: gene_tpm.tsv" + echo "" + echo " --counts_gene" + echo " type: file, output, file must exist" + echo " example: gene_counts.tsv" + echo "" + echo " --counts_gene_length_scaled" + echo " type: file, output, file must exist" + echo " example: gene_counts_length_scaled.tsv" + echo "" + echo " --counts_gene_scaled" + echo " type: file, output, file must exist" + echo " example: gene_counts_scaled.tsv" + echo "" + echo " --tpm_transcript" + echo " type: file, output, file must exist" + echo " example: transcript_tpm.tsv" + echo "" + echo " --lengths_gene" + echo " type: file, output, file must exist" + echo " example: gene_length.tsv" + echo "" + echo " --counts_transcript" + echo " type: file, output, file must exist" + echo " example: transcript_counts.tsv" + echo "" + echo " --lengths_transcript" + echo " type: file, output, file must exist" + echo " example: transcript_length.tsv" + echo "" + echo " --quant_merged_summarizedexperiment" + echo " type: file, output, file must exist" + echo " example: quant_merged_summarizedexperiment" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "merge_quant_results v0.3.0" + exit + ;; + --salmon_quant_results) + if [ -z "$VIASH_PAR_SALMON_QUANT_RESULTS" ]; then + VIASH_PAR_SALMON_QUANT_RESULTS="$2" + else + VIASH_PAR_SALMON_QUANT_RESULTS="$VIASH_PAR_SALMON_QUANT_RESULTS,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_quant_results. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_quant_results=*) + if [ -z "$VIASH_PAR_SALMON_QUANT_RESULTS" ]; then + VIASH_PAR_SALMON_QUANT_RESULTS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SALMON_QUANT_RESULTS="$VIASH_PAR_SALMON_QUANT_RESULTS,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --kallisto_quant_results) + if [ -z "$VIASH_PAR_KALLISTO_QUANT_RESULTS" ]; then + VIASH_PAR_KALLISTO_QUANT_RESULTS="$2" + else + VIASH_PAR_KALLISTO_QUANT_RESULTS="$VIASH_PAR_KALLISTO_QUANT_RESULTS,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --kallisto_quant_results. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --kallisto_quant_results=*) + if [ -z "$VIASH_PAR_KALLISTO_QUANT_RESULTS" ]; then + VIASH_PAR_KALLISTO_QUANT_RESULTS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_KALLISTO_QUANT_RESULTS="$VIASH_PAR_KALLISTO_QUANT_RESULTS,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --gtf) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf=*) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf_extra_attributes) + [ -n "$VIASH_PAR_GTF_EXTRA_ATTRIBUTES" ] && ViashError Bad arguments for option \'--gtf_extra_attributes\': \'$VIASH_PAR_GTF_EXTRA_ATTRIBUTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_EXTRA_ATTRIBUTES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_extra_attributes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf_extra_attributes=*) + [ -n "$VIASH_PAR_GTF_EXTRA_ATTRIBUTES" ] && ViashError Bad arguments for option \'--gtf_extra_attributes=*\': \'$VIASH_PAR_GTF_EXTRA_ATTRIBUTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_EXTRA_ATTRIBUTES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf_group_features) + [ -n "$VIASH_PAR_GTF_GROUP_FEATURES" ] && ViashError Bad arguments for option \'--gtf_group_features\': \'$VIASH_PAR_GTF_GROUP_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_GROUP_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_group_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf_group_features=*) + [ -n "$VIASH_PAR_GTF_GROUP_FEATURES" ] && ViashError Bad arguments for option \'--gtf_group_features=*\': \'$VIASH_PAR_GTF_GROUP_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_GROUP_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quant_type) + [ -n "$VIASH_PAR_QUANT_TYPE" ] && ViashError Bad arguments for option \'--quant_type\': \'$VIASH_PAR_QUANT_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_TYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_type. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quant_type=*) + [ -n "$VIASH_PAR_QUANT_TYPE" ] && ViashError Bad arguments for option \'--quant_type=*\': \'$VIASH_PAR_QUANT_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_TYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --versions) + [ -n "$VIASH_PAR_VERSIONS" ] && ViashError Bad arguments for option \'--versions\': \'$VIASH_PAR_VERSIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VERSIONS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --versions. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --versions=*) + [ -n "$VIASH_PAR_VERSIONS" ] && ViashError Bad arguments for option \'--versions=*\': \'$VIASH_PAR_VERSIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VERSIONS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tpm_gene) + [ -n "$VIASH_PAR_TPM_GENE" ] && ViashError Bad arguments for option \'--tpm_gene\': \'$VIASH_PAR_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tpm_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tpm_gene=*) + [ -n "$VIASH_PAR_TPM_GENE" ] && ViashError Bad arguments for option \'--tpm_gene=*\': \'$VIASH_PAR_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene) + [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene=*) + [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene=*\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene_length_scaled) + [ -n "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_length_scaled\': \'$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene_length_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene_length_scaled=*) + [ -n "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_length_scaled=*\': \'$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene_scaled) + [ -n "$VIASH_PAR_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_scaled\': \'$VIASH_PAR_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene_scaled=*) + [ -n "$VIASH_PAR_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_scaled=*\': \'$VIASH_PAR_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tpm_transcript) + [ -n "$VIASH_PAR_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--tpm_transcript\': \'$VIASH_PAR_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tpm_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tpm_transcript=*) + [ -n "$VIASH_PAR_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--tpm_transcript=*\': \'$VIASH_PAR_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lengths_gene) + [ -n "$VIASH_PAR_LENGTHS_GENE" ] && ViashError Bad arguments for option \'--lengths_gene\': \'$VIASH_PAR_LENGTHS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LENGTHS_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lengths_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --lengths_gene=*) + [ -n "$VIASH_PAR_LENGTHS_GENE" ] && ViashError Bad arguments for option \'--lengths_gene=*\': \'$VIASH_PAR_LENGTHS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LENGTHS_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_transcript) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--counts_transcript\': \'$VIASH_PAR_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_transcript=*) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--counts_transcript=*\': \'$VIASH_PAR_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lengths_transcript) + [ -n "$VIASH_PAR_LENGTHS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--lengths_transcript\': \'$VIASH_PAR_LENGTHS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LENGTHS_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lengths_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --lengths_transcript=*) + [ -n "$VIASH_PAR_LENGTHS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--lengths_transcript=*\': \'$VIASH_PAR_LENGTHS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LENGTHS_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quant_merged_summarizedexperiment) + [ -n "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && ViashError Bad arguments for option \'--quant_merged_summarizedexperiment\': \'$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_merged_summarizedexperiment. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quant_merged_summarizedexperiment=*) + [ -n "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && ViashError Bad arguments for option \'--quant_merged_summarizedexperiment=*\': \'$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: native." + exit 1 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_GTF_EXTRA_ATTRIBUTES+x} ]; then + VIASH_PAR_GTF_EXTRA_ATTRIBUTES="gene_name" +fi +if [ -z ${VIASH_PAR_GTF_GROUP_FEATURES+x} ]; then + VIASH_PAR_GTF_GROUP_FEATURES="gene_id" +fi +if [ -z ${VIASH_PAR_QUANT_TYPE+x} ]; then + VIASH_PAR_QUANT_TYPE="salmon" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_SALMON_QUANT_RESULTS" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_SALMON_QUANT_RESULTS; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_KALLISTO_QUANT_RESULTS" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_KALLISTO_QUANT_RESULTS; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then + ViashError "Input file '$VIASH_PAR_GTF' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_QUANT_TYPE" ]; then + VIASH_PAR_QUANT_TYPE_CHOICES=("salmon;kallisto") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_QUANT_TYPE_CHOICES[*]};" =~ ";$VIASH_PAR_QUANT_TYPE;" ]]; then + ViashError '--quant_type' specified value of \'$VIASH_PAR_QUANT_TYPE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_TPM_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_TPM_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TPM_GENE")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE_SCALED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE_SCALED")" +fi +if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_TPM_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TPM_TRANSCRIPT")" +fi +if [ ! -z "$VIASH_PAR_LENGTHS_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_LENGTHS_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_LENGTHS_GENE")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPT")" +fi +if [ ! -z "$VIASH_PAR_LENGTHS_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_LENGTHS_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_LENGTHS_TRANSCRIPT")" +fi +if [ ! -z "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && [ ! -d "$(dirname "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + + +# set dependency paths +VIASH_DEP_TX2GENE="$VIASH_META_RESOURCES_DIR/../../../nextflow/tx2gene/main.nf" +VIASH_DEP_TXIMPORT="$VIASH_META_RESOURCES_DIR/../../../nextflow/tximport/main.nf" +VIASH_DEP_SUMMARIZEDEXPERIMENT="$VIASH_META_RESOURCES_DIR/../../../nextflow/summarizedexperiment/main.nf" + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-merge_quant_results-XXXXXX").nf +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +//// VIASH START +// The following code has been auto-generated by Viash. + +//// VIASH END +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + + | map { id, state -> + def quant_results = state.quant_type == 'kallisto' ? state.kallisto_quant_results : state.salmon_quant_results + [id, state + [quant_results: quant_results]] + } + + | tx2gene.run ( + fromState: [ + "quant_results": "quant_results", + "gtf_extra_attributes": "gtf_extra_attributes", + "gtf": "gtf", + "gtf_group_features": "gtf_group_features", + "quant_type": "quant_type" + ], + toState: [ "tx2gene_tsv": "tsv" ] + ) + + | tximport.run ( + fromState: [ + "quant_results": "quant_results", + "tx2gene_tsv": "tx2gene_tsv", + "quant_type": "quant_type" + ], + toState: [ + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "length_gene": "length_gene", + "length_transcript": "length_transcript" + ] + ) + + | summarizedexperiment.run ( + fromState: [ + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "tx2gene_tsv": "tx2gene_tsv" + ], + toState: [ "quant_merged_summarizedexperiment": "output" ] + ) + + | setState ( + [ "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment" ] + ) + + emit: + output_ch +} +VIASHMAIN +nextflow run . -main-script "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_TPM_GENE" ] && [ ! -e "$VIASH_PAR_TPM_GENE" ]; then + ViashError "Output file '$VIASH_PAR_TPM_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE_SCALED" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_GENE_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_TPM_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_TPM_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_LENGTHS_GENE" ] && [ ! -e "$VIASH_PAR_LENGTHS_GENE" ]; then + ViashError "Output file '$VIASH_PAR_LENGTHS_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_COUNTS_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_LENGTHS_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_LENGTHS_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_LENGTHS_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && [ ! -e "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ]; then + ViashError "Output file '$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/workflows/merge_quant_results/nextflow_labels.config b/target/executable/workflows/merge_quant_results/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/workflows/merge_quant_results/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/workflows/post_processing/.config.vsh.yaml b/target/executable/workflows/post_processing/.config.vsh.yaml new file mode 100644 index 0000000..a730690 --- /dev/null +++ b/target/executable/workflows/post_processing/.config.vsh.yaml @@ -0,0 +1,528 @@ +name: "post_processing" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "ID of the sample." + info: null + example: + - "foo" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, reverse\ + \ or auto" + info: null + default: + - "auto" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--paired" + description: "Paired fastq files or not?" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fasta" + description: "Path to FASTA genome file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fai" + description: "Path to FASTA index" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "GTF file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam" + description: "Genome BAM file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chrom_sizes" + description: "File containing chromosome lengths" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_multiqc" + description: "STAR align log file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_picard_args" + description: "Extra arguments to pass to picard MarkDuplicates command in addition\ + \ to defaults defined by the pipeline." + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_stringtie_args" + description: "Extra arguments to pass to stringtie command in addition to defaults\ + \ defined by the pipeline." + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--stringtie_ignore_gtf" + description: "Perform reference-guided de novo assembly of transcripts using StringTie,\ + \ i.e. don't restrict to those in GTF file." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--bam_csi_index" + description: "Create a CSI index for BAM files instead of the traditional BAI\ + \ index. This will be required for genomes with larger chromosome sizes." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_mapped_reads" + description: "Minimum percentage of uniquely mapped reads below which samples\ + \ are removed from further processing." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--with_umi" + description: "Enable UMI-based read deduplication." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_qc" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_markduplicates" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_stringtie" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_bigwig" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--processed_genome_bam" + info: null + default: + - "$id.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_index" + info: null + default: + - "$id.genome.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_stats" + info: null + default: + - "$id.genome.stats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_flagstat" + info: null + default: + - "$id.genome.flagstat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_idxstats" + info: null + default: + - "$id.genome.idxstats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--markduplicates_metrics" + info: null + default: + - "$id.MarkDuplicates.metrics.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_transcript_gtf" + info: null + default: + - "$id.stringtie.transcripts.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_coverage_gtf" + info: null + default: + - "$id.stringtie.coverage.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_abundance" + info: null + default: + - "$id.stringtie.gene_abundance.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_ballgown" + info: null + default: + - "$id.stringtie.ballgown" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bedgraph_forward" + info: null + default: + - "$id.forward.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bedgraph_reverse" + info: null + default: + - "$id.reverse.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bigwig_forward" + info: null + default: + - "$id.forward.bigwig" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bigwig_reverse" + info: null + default: + - "$id.reverse.bigwig" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A viash sub-workflow for the post-processing stage of nf-core/rnaseq\ + \ pipeline.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "picard_markduplicates" + repository: + type: "local" +- name: "samtools/samtools_sort" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_index" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_stats" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_flagstat" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_idxstats" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "stringtie" + repository: + type: "local" +- name: "bedtools/bedtools_genomecov" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "ucsc/bedclip" + repository: + type: "local" +- name: "ucsc/bedgraphtobigwig" + repository: + type: "local" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/post_processing/config.vsh.yaml" + runner: "executable" + engine: "native" + output: "target/executable/workflows/post_processing" + executable: "target/executable/workflows/post_processing/post_processing" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/nextflow/picard_markduplicates" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats" + - "target/nextflow/stringtie" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov" + - "target/nextflow/ucsc/bedclip" + - "target/nextflow/ucsc/bedgraphtobigwig" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/workflows/post_processing/nextflow_labels.config b/target/executable/workflows/post_processing/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/workflows/post_processing/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/workflows/post_processing/post_processing b/target/executable/workflows/post_processing/post_processing new file mode 100755 index 0000000..eac0586 --- /dev/null +++ b/target/executable/workflows/post_processing/post_processing @@ -0,0 +1,1421 @@ +#!/usr/bin/env bash + +# post_processing v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="post_processing" +VIASH_META_FUNCTIONALITY_NAME="post_processing" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "post_processing v0.3.0" + echo "" + echo "A viash sub-workflow for the post-processing stage of nf-core/rnaseq pipeline." + echo "" + echo "Input:" + echo " --id" + echo " type: string, required parameter" + echo " example: foo" + echo " ID of the sample." + echo "" + echo " --strandedness" + echo " type: string" + echo " default: auto" + echo " Sample strand-specificity. Must be one of unstranded, forward, reverse" + echo " or auto" + echo "" + echo " --paired" + echo " type: boolean" + echo " Paired fastq files or not?" + echo "" + echo " --fasta" + echo " type: file, required parameter, file must exist" + echo " Path to FASTA genome file." + echo "" + echo " --fai" + echo " type: file, required parameter, file must exist" + echo " Path to FASTA index" + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo " GTF file" + echo "" + echo " --genome_bam" + echo " type: file, file must exist" + echo " Genome BAM file" + echo "" + echo " --chrom_sizes" + echo " type: file, file must exist" + echo " File containing chromosome lengths" + echo "" + echo " --star_multiqc" + echo " type: file, file must exist" + echo " STAR align log file." + echo "" + echo " --extra_picard_args" + echo " type: string" + echo " default:" + echo " Extra arguments to pass to picard MarkDuplicates command in addition to" + echo " defaults defined by the pipeline." + echo "" + echo " --extra_stringtie_args" + echo " type: string" + echo " default:" + echo " Extra arguments to pass to stringtie command in addition to defaults" + echo " defined by the pipeline." + echo "" + echo " --stringtie_ignore_gtf" + echo " type: boolean" + echo " Perform reference-guided de novo assembly of transcripts using" + echo " StringTie, i.e. don't restrict to those in GTF file." + echo "" + echo " --bam_csi_index" + echo " type: boolean" + echo " default: false" + echo " Create a CSI index for BAM files instead of the traditional BAI index." + echo " This will be required for genomes with larger chromosome sizes." + echo "" + echo " --min_mapped_reads" + echo " type: integer" + echo " Minimum percentage of uniquely mapped reads below which samples are" + echo " removed from further processing." + echo "" + echo " --with_umi" + echo " type: boolean" + echo " default: false" + echo " Enable UMI-based read deduplication." + echo "" + echo " --skip_qc" + echo " type: boolean" + echo "" + echo " --skip_markduplicates" + echo " type: boolean" + echo "" + echo " --skip_stringtie" + echo " type: boolean" + echo "" + echo " --skip_bigwig" + echo " type: boolean" + echo "" + echo "Output:" + echo " --processed_genome_bam" + echo " type: file, output, file must exist" + echo " default: \$id.genome.bam" + echo "" + echo " --genome_bam_index" + echo " type: file, output, file must exist" + echo " default: \$id.genome.bam.bai" + echo "" + echo " --genome_bam_stats" + echo " type: file, output, file must exist" + echo " default: \$id.genome.stats" + echo "" + echo " --genome_bam_flagstat" + echo " type: file, output, file must exist" + echo " default: \$id.genome.flagstat" + echo "" + echo " --genome_bam_idxstats" + echo " type: file, output, file must exist" + echo " default: \$id.genome.idxstats" + echo "" + echo " --markduplicates_metrics" + echo " type: file, output, file must exist" + echo " default: \$id.MarkDuplicates.metrics.txt" + echo "" + echo " --stringtie_transcript_gtf" + echo " type: file, output, file must exist" + echo " default: \$id.stringtie.transcripts.gtf" + echo "" + echo " --stringtie_coverage_gtf" + echo " type: file, output, file must exist" + echo " default: \$id.stringtie.coverage.gtf" + echo "" + echo " --stringtie_abundance" + echo " type: file, output, file must exist" + echo " default: \$id.stringtie.gene_abundance.txt" + echo "" + echo " --stringtie_ballgown" + echo " type: file, output, file must exist" + echo " default: \$id.stringtie.ballgown" + echo "" + echo " --bedgraph_forward" + echo " type: file, output, file must exist" + echo " default: \$id.forward.bedgraph" + echo "" + echo " --bedgraph_reverse" + echo " type: file, output, file must exist" + echo " default: \$id.reverse.bedgraph" + echo "" + echo " --bigwig_forward" + echo " type: file, output, file must exist" + echo " default: \$id.forward.bigwig" + echo "" + echo " --bigwig_reverse" + echo " type: file, output, file must exist" + echo " default: \$id.reverse.bigwig" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "post_processing v0.3.0" + exit + ;; + --id) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id=*) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id=*\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --strandedness) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --strandedness. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strandedness=*) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness=*\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --paired) + [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PAIRED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --paired. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --paired=*) + [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired=*\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PAIRED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fasta) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fasta=*) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta=*\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fai) + [ -n "$VIASH_PAR_FAI" ] && ViashError Bad arguments for option \'--fai\': \'$VIASH_PAR_FAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAI="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fai. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fai=*) + [ -n "$VIASH_PAR_FAI" ] && ViashError Bad arguments for option \'--fai=*\': \'$VIASH_PAR_FAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAI=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf=*) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam) + [ -n "$VIASH_PAR_GENOME_BAM" ] && ViashError Bad arguments for option \'--genome_bam\': \'$VIASH_PAR_GENOME_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam=*) + [ -n "$VIASH_PAR_GENOME_BAM" ] && ViashError Bad arguments for option \'--genome_bam=*\': \'$VIASH_PAR_GENOME_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chrom_sizes) + [ -n "$VIASH_PAR_CHROM_SIZES" ] && ViashError Bad arguments for option \'--chrom_sizes\': \'$VIASH_PAR_CHROM_SIZES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHROM_SIZES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chrom_sizes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chrom_sizes=*) + [ -n "$VIASH_PAR_CHROM_SIZES" ] && ViashError Bad arguments for option \'--chrom_sizes=*\': \'$VIASH_PAR_CHROM_SIZES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHROM_SIZES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --star_multiqc) + [ -n "$VIASH_PAR_STAR_MULTIQC" ] && ViashError Bad arguments for option \'--star_multiqc\': \'$VIASH_PAR_STAR_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_multiqc=*) + [ -n "$VIASH_PAR_STAR_MULTIQC" ] && ViashError Bad arguments for option \'--star_multiqc=*\': \'$VIASH_PAR_STAR_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --extra_picard_args) + [ -n "$VIASH_PAR_EXTRA_PICARD_ARGS" ] && ViashError Bad arguments for option \'--extra_picard_args\': \'$VIASH_PAR_EXTRA_PICARD_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_PICARD_ARGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_picard_args. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --extra_picard_args=*) + [ -n "$VIASH_PAR_EXTRA_PICARD_ARGS" ] && ViashError Bad arguments for option \'--extra_picard_args=*\': \'$VIASH_PAR_EXTRA_PICARD_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_PICARD_ARGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --extra_stringtie_args) + [ -n "$VIASH_PAR_EXTRA_STRINGTIE_ARGS" ] && ViashError Bad arguments for option \'--extra_stringtie_args\': \'$VIASH_PAR_EXTRA_STRINGTIE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_STRINGTIE_ARGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_stringtie_args. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --extra_stringtie_args=*) + [ -n "$VIASH_PAR_EXTRA_STRINGTIE_ARGS" ] && ViashError Bad arguments for option \'--extra_stringtie_args=*\': \'$VIASH_PAR_EXTRA_STRINGTIE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_STRINGTIE_ARGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --stringtie_ignore_gtf) + [ -n "$VIASH_PAR_STRINGTIE_IGNORE_GTF" ] && ViashError Bad arguments for option \'--stringtie_ignore_gtf\': \'$VIASH_PAR_STRINGTIE_IGNORE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_IGNORE_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --stringtie_ignore_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --stringtie_ignore_gtf=*) + [ -n "$VIASH_PAR_STRINGTIE_IGNORE_GTF" ] && ViashError Bad arguments for option \'--stringtie_ignore_gtf=*\': \'$VIASH_PAR_STRINGTIE_IGNORE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_IGNORE_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bam_csi_index) + [ -n "$VIASH_PAR_BAM_CSI_INDEX" ] && ViashError Bad arguments for option \'--bam_csi_index\': \'$VIASH_PAR_BAM_CSI_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_CSI_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam_csi_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam_csi_index=*) + [ -n "$VIASH_PAR_BAM_CSI_INDEX" ] && ViashError Bad arguments for option \'--bam_csi_index=*\': \'$VIASH_PAR_BAM_CSI_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_CSI_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_mapped_reads) + [ -n "$VIASH_PAR_MIN_MAPPED_READS" ] && ViashError Bad arguments for option \'--min_mapped_reads\': \'$VIASH_PAR_MIN_MAPPED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_MAPPED_READS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_mapped_reads. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_mapped_reads=*) + [ -n "$VIASH_PAR_MIN_MAPPED_READS" ] && ViashError Bad arguments for option \'--min_mapped_reads=*\': \'$VIASH_PAR_MIN_MAPPED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_MAPPED_READS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --with_umi) + [ -n "$VIASH_PAR_WITH_UMI" ] && ViashError Bad arguments for option \'--with_umi\': \'$VIASH_PAR_WITH_UMI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WITH_UMI="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --with_umi. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --with_umi=*) + [ -n "$VIASH_PAR_WITH_UMI" ] && ViashError Bad arguments for option \'--with_umi=*\': \'$VIASH_PAR_WITH_UMI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WITH_UMI=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_qc) + [ -n "$VIASH_PAR_SKIP_QC" ] && ViashError Bad arguments for option \'--skip_qc\': \'$VIASH_PAR_SKIP_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_QC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_qc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_qc=*) + [ -n "$VIASH_PAR_SKIP_QC" ] && ViashError Bad arguments for option \'--skip_qc=*\': \'$VIASH_PAR_SKIP_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_QC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_markduplicates) + [ -n "$VIASH_PAR_SKIP_MARKDUPLICATES" ] && ViashError Bad arguments for option \'--skip_markduplicates\': \'$VIASH_PAR_SKIP_MARKDUPLICATES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_MARKDUPLICATES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_markduplicates. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_markduplicates=*) + [ -n "$VIASH_PAR_SKIP_MARKDUPLICATES" ] && ViashError Bad arguments for option \'--skip_markduplicates=*\': \'$VIASH_PAR_SKIP_MARKDUPLICATES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_MARKDUPLICATES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_stringtie) + [ -n "$VIASH_PAR_SKIP_STRINGTIE" ] && ViashError Bad arguments for option \'--skip_stringtie\': \'$VIASH_PAR_SKIP_STRINGTIE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_STRINGTIE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_stringtie. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_stringtie=*) + [ -n "$VIASH_PAR_SKIP_STRINGTIE" ] && ViashError Bad arguments for option \'--skip_stringtie=*\': \'$VIASH_PAR_SKIP_STRINGTIE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_STRINGTIE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_bigwig) + [ -n "$VIASH_PAR_SKIP_BIGWIG" ] && ViashError Bad arguments for option \'--skip_bigwig\': \'$VIASH_PAR_SKIP_BIGWIG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_BIGWIG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_bigwig. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_bigwig=*) + [ -n "$VIASH_PAR_SKIP_BIGWIG" ] && ViashError Bad arguments for option \'--skip_bigwig=*\': \'$VIASH_PAR_SKIP_BIGWIG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_BIGWIG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --processed_genome_bam) + [ -n "$VIASH_PAR_PROCESSED_GENOME_BAM" ] && ViashError Bad arguments for option \'--processed_genome_bam\': \'$VIASH_PAR_PROCESSED_GENOME_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PROCESSED_GENOME_BAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --processed_genome_bam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --processed_genome_bam=*) + [ -n "$VIASH_PAR_PROCESSED_GENOME_BAM" ] && ViashError Bad arguments for option \'--processed_genome_bam=*\': \'$VIASH_PAR_PROCESSED_GENOME_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PROCESSED_GENOME_BAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_index) + [ -n "$VIASH_PAR_GENOME_BAM_INDEX" ] && ViashError Bad arguments for option \'--genome_bam_index\': \'$VIASH_PAR_GENOME_BAM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_index=*) + [ -n "$VIASH_PAR_GENOME_BAM_INDEX" ] && ViashError Bad arguments for option \'--genome_bam_index=*\': \'$VIASH_PAR_GENOME_BAM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_stats) + [ -n "$VIASH_PAR_GENOME_BAM_STATS" ] && ViashError Bad arguments for option \'--genome_bam_stats\': \'$VIASH_PAR_GENOME_BAM_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_STATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_stats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_stats=*) + [ -n "$VIASH_PAR_GENOME_BAM_STATS" ] && ViashError Bad arguments for option \'--genome_bam_stats=*\': \'$VIASH_PAR_GENOME_BAM_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_STATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_flagstat) + [ -n "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && ViashError Bad arguments for option \'--genome_bam_flagstat\': \'$VIASH_PAR_GENOME_BAM_FLAGSTAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_FLAGSTAT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_flagstat. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_flagstat=*) + [ -n "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && ViashError Bad arguments for option \'--genome_bam_flagstat=*\': \'$VIASH_PAR_GENOME_BAM_FLAGSTAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_FLAGSTAT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_idxstats) + [ -n "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && ViashError Bad arguments for option \'--genome_bam_idxstats\': \'$VIASH_PAR_GENOME_BAM_IDXSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_IDXSTATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_idxstats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_idxstats=*) + [ -n "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && ViashError Bad arguments for option \'--genome_bam_idxstats=*\': \'$VIASH_PAR_GENOME_BAM_IDXSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_IDXSTATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --markduplicates_metrics) + [ -n "$VIASH_PAR_MARKDUPLICATES_METRICS" ] && ViashError Bad arguments for option \'--markduplicates_metrics\': \'$VIASH_PAR_MARKDUPLICATES_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MARKDUPLICATES_METRICS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --markduplicates_metrics. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --markduplicates_metrics=*) + [ -n "$VIASH_PAR_MARKDUPLICATES_METRICS" ] && ViashError Bad arguments for option \'--markduplicates_metrics=*\': \'$VIASH_PAR_MARKDUPLICATES_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MARKDUPLICATES_METRICS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --stringtie_transcript_gtf) + [ -n "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF" ] && ViashError Bad arguments for option \'--stringtie_transcript_gtf\': \'$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --stringtie_transcript_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --stringtie_transcript_gtf=*) + [ -n "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF" ] && ViashError Bad arguments for option \'--stringtie_transcript_gtf=*\': \'$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --stringtie_coverage_gtf) + [ -n "$VIASH_PAR_STRINGTIE_COVERAGE_GTF" ] && ViashError Bad arguments for option \'--stringtie_coverage_gtf\': \'$VIASH_PAR_STRINGTIE_COVERAGE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_COVERAGE_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --stringtie_coverage_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --stringtie_coverage_gtf=*) + [ -n "$VIASH_PAR_STRINGTIE_COVERAGE_GTF" ] && ViashError Bad arguments for option \'--stringtie_coverage_gtf=*\': \'$VIASH_PAR_STRINGTIE_COVERAGE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_COVERAGE_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --stringtie_abundance) + [ -n "$VIASH_PAR_STRINGTIE_ABUNDANCE" ] && ViashError Bad arguments for option \'--stringtie_abundance\': \'$VIASH_PAR_STRINGTIE_ABUNDANCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_ABUNDANCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --stringtie_abundance. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --stringtie_abundance=*) + [ -n "$VIASH_PAR_STRINGTIE_ABUNDANCE" ] && ViashError Bad arguments for option \'--stringtie_abundance=*\': \'$VIASH_PAR_STRINGTIE_ABUNDANCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_ABUNDANCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --stringtie_ballgown) + [ -n "$VIASH_PAR_STRINGTIE_BALLGOWN" ] && ViashError Bad arguments for option \'--stringtie_ballgown\': \'$VIASH_PAR_STRINGTIE_BALLGOWN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_BALLGOWN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --stringtie_ballgown. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --stringtie_ballgown=*) + [ -n "$VIASH_PAR_STRINGTIE_BALLGOWN" ] && ViashError Bad arguments for option \'--stringtie_ballgown=*\': \'$VIASH_PAR_STRINGTIE_BALLGOWN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_BALLGOWN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bedgraph_forward) + [ -n "$VIASH_PAR_BEDGRAPH_FORWARD" ] && ViashError Bad arguments for option \'--bedgraph_forward\': \'$VIASH_PAR_BEDGRAPH_FORWARD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH_FORWARD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bedgraph_forward. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bedgraph_forward=*) + [ -n "$VIASH_PAR_BEDGRAPH_FORWARD" ] && ViashError Bad arguments for option \'--bedgraph_forward=*\': \'$VIASH_PAR_BEDGRAPH_FORWARD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH_FORWARD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bedgraph_reverse) + [ -n "$VIASH_PAR_BEDGRAPH_REVERSE" ] && ViashError Bad arguments for option \'--bedgraph_reverse\': \'$VIASH_PAR_BEDGRAPH_REVERSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH_REVERSE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bedgraph_reverse. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bedgraph_reverse=*) + [ -n "$VIASH_PAR_BEDGRAPH_REVERSE" ] && ViashError Bad arguments for option \'--bedgraph_reverse=*\': \'$VIASH_PAR_BEDGRAPH_REVERSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH_REVERSE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bigwig_forward) + [ -n "$VIASH_PAR_BIGWIG_FORWARD" ] && ViashError Bad arguments for option \'--bigwig_forward\': \'$VIASH_PAR_BIGWIG_FORWARD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIGWIG_FORWARD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bigwig_forward. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bigwig_forward=*) + [ -n "$VIASH_PAR_BIGWIG_FORWARD" ] && ViashError Bad arguments for option \'--bigwig_forward=*\': \'$VIASH_PAR_BIGWIG_FORWARD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIGWIG_FORWARD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bigwig_reverse) + [ -n "$VIASH_PAR_BIGWIG_REVERSE" ] && ViashError Bad arguments for option \'--bigwig_reverse\': \'$VIASH_PAR_BIGWIG_REVERSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIGWIG_REVERSE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bigwig_reverse. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bigwig_reverse=*) + [ -n "$VIASH_PAR_BIGWIG_REVERSE" ] && ViashError Bad arguments for option \'--bigwig_reverse=*\': \'$VIASH_PAR_BIGWIG_REVERSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIGWIG_REVERSE=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: native." + exit 1 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_ID+x} ]; then + ViashError '--id' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_FASTA+x} ]; then + ViashError '--fasta' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_FAI+x} ]; then + ViashError '--fai' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_STRANDEDNESS+x} ]; then + VIASH_PAR_STRANDEDNESS="auto" +fi +if [ -z ${VIASH_PAR_EXTRA_PICARD_ARGS+x} ]; then + VIASH_PAR_EXTRA_PICARD_ARGS="" +fi +if [ -z ${VIASH_PAR_EXTRA_STRINGTIE_ARGS+x} ]; then + VIASH_PAR_EXTRA_STRINGTIE_ARGS="" +fi +if [ -z ${VIASH_PAR_BAM_CSI_INDEX+x} ]; then + VIASH_PAR_BAM_CSI_INDEX="false" +fi +if [ -z ${VIASH_PAR_WITH_UMI+x} ]; then + VIASH_PAR_WITH_UMI="false" +fi +if [ -z ${VIASH_PAR_PROCESSED_GENOME_BAM+x} ]; then + VIASH_PAR_PROCESSED_GENOME_BAM="\$id.genome.bam" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_INDEX+x} ]; then + VIASH_PAR_GENOME_BAM_INDEX="\$id.genome.bam.bai" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_STATS+x} ]; then + VIASH_PAR_GENOME_BAM_STATS="\$id.genome.stats" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_FLAGSTAT+x} ]; then + VIASH_PAR_GENOME_BAM_FLAGSTAT="\$id.genome.flagstat" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_IDXSTATS+x} ]; then + VIASH_PAR_GENOME_BAM_IDXSTATS="\$id.genome.idxstats" +fi +if [ -z ${VIASH_PAR_MARKDUPLICATES_METRICS+x} ]; then + VIASH_PAR_MARKDUPLICATES_METRICS="\$id.MarkDuplicates.metrics.txt" +fi +if [ -z ${VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF+x} ]; then + VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF="\$id.stringtie.transcripts.gtf" +fi +if [ -z ${VIASH_PAR_STRINGTIE_COVERAGE_GTF+x} ]; then + VIASH_PAR_STRINGTIE_COVERAGE_GTF="\$id.stringtie.coverage.gtf" +fi +if [ -z ${VIASH_PAR_STRINGTIE_ABUNDANCE+x} ]; then + VIASH_PAR_STRINGTIE_ABUNDANCE="\$id.stringtie.gene_abundance.txt" +fi +if [ -z ${VIASH_PAR_STRINGTIE_BALLGOWN+x} ]; then + VIASH_PAR_STRINGTIE_BALLGOWN="\$id.stringtie.ballgown" +fi +if [ -z ${VIASH_PAR_BEDGRAPH_FORWARD+x} ]; then + VIASH_PAR_BEDGRAPH_FORWARD="\$id.forward.bedgraph" +fi +if [ -z ${VIASH_PAR_BEDGRAPH_REVERSE+x} ]; then + VIASH_PAR_BEDGRAPH_REVERSE="\$id.reverse.bedgraph" +fi +if [ -z ${VIASH_PAR_BIGWIG_FORWARD+x} ]; then + VIASH_PAR_BIGWIG_FORWARD="\$id.forward.bigwig" +fi +if [ -z ${VIASH_PAR_BIGWIG_REVERSE+x} ]; then + VIASH_PAR_BIGWIG_REVERSE="\$id.reverse.bigwig" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTA" ] && [ ! -e "$VIASH_PAR_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_FAI" ] && [ ! -e "$VIASH_PAR_FAI" ]; then + ViashError "Input file '$VIASH_PAR_FAI' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then + ViashError "Input file '$VIASH_PAR_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM" ] && [ ! -e "$VIASH_PAR_GENOME_BAM" ]; then + ViashError "Input file '$VIASH_PAR_GENOME_BAM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_CHROM_SIZES" ] && [ ! -e "$VIASH_PAR_CHROM_SIZES" ]; then + ViashError "Input file '$VIASH_PAR_CHROM_SIZES' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STAR_MULTIQC" ] && [ ! -e "$VIASH_PAR_STAR_MULTIQC" ]; then + ViashError "Input file '$VIASH_PAR_STAR_MULTIQC' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_PAIRED" ]]; then + if ! [[ "$VIASH_PAR_PAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--paired' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_STRINGTIE_IGNORE_GTF" ]]; then + if ! [[ "$VIASH_PAR_STRINGTIE_IGNORE_GTF" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--stringtie_ignore_gtf' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_BAM_CSI_INDEX" ]]; then + if ! [[ "$VIASH_PAR_BAM_CSI_INDEX" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--bam_csi_index' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_MAPPED_READS" ]]; then + if ! [[ "$VIASH_PAR_MIN_MAPPED_READS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_mapped_reads' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WITH_UMI" ]]; then + if ! [[ "$VIASH_PAR_WITH_UMI" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--with_umi' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_QC" ]]; then + if ! [[ "$VIASH_PAR_SKIP_QC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_qc' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_MARKDUPLICATES" ]]; then + if ! [[ "$VIASH_PAR_SKIP_MARKDUPLICATES" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_markduplicates' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_STRINGTIE" ]]; then + if ! [[ "$VIASH_PAR_SKIP_STRINGTIE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_stringtie' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_BIGWIG" ]]; then + if ! [[ "$VIASH_PAR_SKIP_BIGWIG" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_bigwig' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_PROCESSED_GENOME_BAM" ] && [ ! -d "$(dirname "$VIASH_PAR_PROCESSED_GENOME_BAM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PROCESSED_GENOME_BAM")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_INDEX" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_INDEX")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_INDEX")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_STATS" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_STATS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_STATS")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_FLAGSTAT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_FLAGSTAT")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_IDXSTATS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_IDXSTATS")" +fi +if [ ! -z "$VIASH_PAR_MARKDUPLICATES_METRICS" ] && [ ! -d "$(dirname "$VIASH_PAR_MARKDUPLICATES_METRICS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MARKDUPLICATES_METRICS")" +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF" ] && [ ! -d "$(dirname "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF")" +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_COVERAGE_GTF" ] && [ ! -d "$(dirname "$VIASH_PAR_STRINGTIE_COVERAGE_GTF")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STRINGTIE_COVERAGE_GTF")" +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_ABUNDANCE" ] && [ ! -d "$(dirname "$VIASH_PAR_STRINGTIE_ABUNDANCE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STRINGTIE_ABUNDANCE")" +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_BALLGOWN" ] && [ ! -d "$(dirname "$VIASH_PAR_STRINGTIE_BALLGOWN")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STRINGTIE_BALLGOWN")" +fi +if [ ! -z "$VIASH_PAR_BEDGRAPH_FORWARD" ] && [ ! -d "$(dirname "$VIASH_PAR_BEDGRAPH_FORWARD")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BEDGRAPH_FORWARD")" +fi +if [ ! -z "$VIASH_PAR_BEDGRAPH_REVERSE" ] && [ ! -d "$(dirname "$VIASH_PAR_BEDGRAPH_REVERSE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BEDGRAPH_REVERSE")" +fi +if [ ! -z "$VIASH_PAR_BIGWIG_FORWARD" ] && [ ! -d "$(dirname "$VIASH_PAR_BIGWIG_FORWARD")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BIGWIG_FORWARD")" +fi +if [ ! -z "$VIASH_PAR_BIGWIG_REVERSE" ] && [ ! -d "$(dirname "$VIASH_PAR_BIGWIG_REVERSE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BIGWIG_REVERSE")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + + +# set dependency paths +VIASH_DEP_PICARD_MARKDUPLICATES="$VIASH_META_RESOURCES_DIR/../../../nextflow/picard_markduplicates/main.nf" +VIASH_DEP_STRINGTIE="$VIASH_META_RESOURCES_DIR/../../../nextflow/stringtie/main.nf" +VIASH_DEP_UCSC_BEDCLIP="$VIASH_META_RESOURCES_DIR/../../../nextflow/ucsc/bedclip/main.nf" +VIASH_DEP_UCSC_BEDGRAPHTOBIGWIG="$VIASH_META_RESOURCES_DIR/../../../nextflow/ucsc/bedgraphtobigwig/main.nf" +VIASH_DEP_SAMTOOLS_SAMTOOLS_SORT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/main.nf" +VIASH_DEP_SAMTOOLS_SAMTOOLS_INDEX="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/main.nf" +VIASH_DEP_SAMTOOLS_SAMTOOLS_STATS="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/main.nf" +VIASH_DEP_SAMTOOLS_SAMTOOLS_FLAGSTAT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/main.nf" +VIASH_DEP_SAMTOOLS_SAMTOOLS_IDXSTATS="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/main.nf" +VIASH_DEP_BEDTOOLS_BEDTOOLS_GENOMECOV="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/main.nf" + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-post_processing-XXXXXX").nf +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +//// VIASH START +// The following code has been auto-generated by Viash. + +//// VIASH END +// Note: some helper functionality is added at the end of this file + +workflow run_wf { + take: + input_ch + + main: + + output_ch = input_ch + + | picard_markduplicates.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "bam": "genome_bam", + "fasta": "fasta", + "fai": "fai", + "extra_picard_args": "extra_picard_args" + ], + toState: [ + "processed_genome_bam": "output_bam", + "markduplicates_metrics": "metrics" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_sort.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ "input": "processed_genome_bam" ], + toState: [ "processed_genome_bam": "output" ], + key: "genome_sorted_MarkDuplicates", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_index.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "input": "processed_genome_bam", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_sorted_MarkDuplicates", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_stats.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "input": "processed_genome_bam", + "bai": "genome_bam_index" + ], + toState: [ "genome_bam_stats": "output" ], + key: "MarkDuplicates_stats", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_flagstat.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "bam": "processed_genome_bam", + "bai": "genome_bam_index" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "MarkDuplicates_flagstat", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "bam": "processed_genome_bam", + "bai": "genome_bam_index" + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "MarkDuplicates_idxstats", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | stringtie.run ( + runIf: { id, state -> !state.skip_stringtie }, + fromState: [ + "strandedness": "strandedness", + "bam": "processed_genome_bam", + "annotation_gtf": "gtf", + "extra_stringtie_args": "extra_stringtie_args" + ], + toState: [ + "stringtie_transcript_gtf": "transcript_gtf", + "stringtie_coverage_gtf": "coverage_gtf", + "stringtie_abundance": "abundance", + "stringtie_ballgown": "ballgown" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Genome-wide coverage with BEDTools + + | bedtools_genomecov.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bam": "processed_genome_bam", + ], + args: [ + split: true, + du: true, + bed_graph: true, + strand: "+" + ], + toState: [ "bedgraph_forward": "output" ], + key: "bedtools_genomecov_forward", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedtools_genomecov.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bam": "processed_genome_bam", + ], + args: [ + split: true, + du: true, + bed_graph: true, + strand: "-" + ], + toState: [ "bedgraph_reverse": "output" ], + key: "bedtools_genomecov_reverse", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedclip.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bedgraph": "bedgraph_forward", + "sizes": "chrom_sizes" + ], + toState: [ "bedgraph_forward": "output_bedgraph" ], + key: "bedclip_forward", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedgraphtobigwig.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "bedgraph": "bedgraph_forward", + "sizes": "chrom_sizes" + ], + toState: [ "bigwig_forward": "bigwig" ], + key: "bedgraphtobigwig_forward", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedclip.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bedgraph": "bedgraph_reverse", + "sizes": "chrom_sizes", + ], + toState: [ "bedgraph_reverse": "output_bedgraph" ], + key: "bedclip_reverse", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedgraphtobigwig.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "bedgraph": "bedgraph_reverse", + "sizes": "chrom_sizes" + ], + toState: [ "bigwig_reverse": "bigwig" ], + key: "bedgraphtobigwig_reverse", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + "processed_genome_bam": "processed_genome_bam", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "markduplicates_metrics": "markduplicates_metrics", + "stringtie_transcript_gtf": "stringtie_transcript_gtf", + "stringtie_coverage_gtf": "stringtie_coverage_gtf", + "stringtie_abundance": "stringtie_abundance", + "stringtie_ballgown": "stringtie_ballgown", + "bedgraph_forward": "bedgraph_forward", + "bedgraph_reverse": "bedgraph_reverse", + "bigwig_forward": "bigwig_forward", + "bigwig_reverse": "bigwig_reverse" + ) + + emit: + output_ch +} +VIASHMAIN +nextflow run . -main-script "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_PROCESSED_GENOME_BAM" ] && [ ! -e "$VIASH_PAR_PROCESSED_GENOME_BAM" ]; then + ViashError "Output file '$VIASH_PAR_PROCESSED_GENOME_BAM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_INDEX" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_INDEX" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_STATS" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_STATS" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_STATS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_FLAGSTAT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_IDXSTATS" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_IDXSTATS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MARKDUPLICATES_METRICS" ] && [ ! -e "$VIASH_PAR_MARKDUPLICATES_METRICS" ]; then + ViashError "Output file '$VIASH_PAR_MARKDUPLICATES_METRICS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF" ] && [ ! -e "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF" ]; then + ViashError "Output file '$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_COVERAGE_GTF" ] && [ ! -e "$VIASH_PAR_STRINGTIE_COVERAGE_GTF" ]; then + ViashError "Output file '$VIASH_PAR_STRINGTIE_COVERAGE_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_ABUNDANCE" ] && [ ! -e "$VIASH_PAR_STRINGTIE_ABUNDANCE" ]; then + ViashError "Output file '$VIASH_PAR_STRINGTIE_ABUNDANCE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_BALLGOWN" ] && [ ! -e "$VIASH_PAR_STRINGTIE_BALLGOWN" ]; then + ViashError "Output file '$VIASH_PAR_STRINGTIE_BALLGOWN' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BEDGRAPH_FORWARD" ] && [ ! -e "$VIASH_PAR_BEDGRAPH_FORWARD" ]; then + ViashError "Output file '$VIASH_PAR_BEDGRAPH_FORWARD' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BEDGRAPH_REVERSE" ] && [ ! -e "$VIASH_PAR_BEDGRAPH_REVERSE" ]; then + ViashError "Output file '$VIASH_PAR_BEDGRAPH_REVERSE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BIGWIG_FORWARD" ] && [ ! -e "$VIASH_PAR_BIGWIG_FORWARD" ]; then + ViashError "Output file '$VIASH_PAR_BIGWIG_FORWARD' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BIGWIG_REVERSE" ] && [ ! -e "$VIASH_PAR_BIGWIG_REVERSE" ]; then + ViashError "Output file '$VIASH_PAR_BIGWIG_REVERSE' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/workflows/pre_processing/.config.vsh.yaml b/target/executable/workflows/pre_processing/.config.vsh.yaml new file mode 100644 index 0000000..423963a --- /dev/null +++ b/target/executable/workflows/pre_processing/.config.vsh.yaml @@ -0,0 +1,681 @@ +name: "pre_processing" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Inputs" + arguments: + - type: "string" + name: "--id" + description: "ID of the sample." + info: null + example: + - "foo" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_1" + description: "Path to the sample (or read 1 of paired end sample)." + info: null + example: + - "input.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Path to read 2 of the sample." + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, reverse\ + \ or auto" + info: null + default: + - "auto" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bbsplit_index" + description: "BBsplit index" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--ribo_database_manifest" + description: "Text file containing paths to fasta files (one per line) that will\ + \ be used to create the database for SortMeRNA." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcript_fasta" + description: "Path to FASTA transcriptome file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "Path to GTF annotation file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_index" + description: "Path to directory containing the Salmon index" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--num_trimmed_reads" + description: "Number of reads after trimming" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Extra pipeline options" + arguments: + - type: "boolean" + name: "--skip_qc" + description: "Skip QC steps of the workflow." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "FastQC options" + arguments: + - type: "boolean" + name: "--skip_fastqc" + description: "Skip FatQC step." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "UMI-tools options" + arguments: + - type: "boolean" + name: "--with_umi" + description: "Enable UMI-based read deduplication." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_umi_extract" + description: "Skip umi_tools extract step." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_extract_method" + description: "UMI pattern to use." + info: null + default: + - "string" + required: false + choices: + - "string" + - "regex" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_bc_pattern" + description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the\ + \ first 6 nucleotides of the read are from the UMI." + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_bc_pattern2" + description: "The UMI barcode pattern to use if the UMI is located in read 2." + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--umi_discard_read" + description: "After UMI barcode extraction discard either R1 or R2 by setting\ + \ this parameter to 1 or 2, respectively." + info: null + default: + - 0 + required: false + choices: + - 0 + - 1 + - 2 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_umi_separator" + description: "The character that separates the UMI in the read name. Most likely\ + \ a colon if you skipped the extraction with UMI-tools and used other software." + info: null + default: + - "_" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_grouping_method" + description: "Method to use to determine read groups by subsuming those with similar\ + \ UMIs. All methods start by identifying the reads with the same mapping position,\ + \ but treat similar yet nonidentical UMIs differently." + info: null + default: + - "directional" + required: false + choices: + - "unique" + - "percentile" + - "cluster" + - "adjacency" + - "directional" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--save_umi_intermeds" + description: "If this option is specified, intermediate FastQ and BAM files produced\ + \ by UMI-tools are also saved in the results directory." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Read trimming options" + arguments: + - type: "string" + name: "--trimmer" + description: "Specify the trimming tool to use." + info: null + default: + - "trimgalore" + required: false + choices: + - "trimgalore" + - "fastp" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_trimmed_reads" + description: "Minimum number of trimmed reads below which samples are removed\ + \ from further processing. Some downstream steps in the pipeline will fail if\ + \ this threshold is too low." + info: null + default: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_trimming" + description: "Skip the adapter trimming step." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--save_trimmed" + description: "Save the trimmed FastQ files in the results directory." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Read filtering options" + arguments: + - type: "boolean_true" + name: "--skip_bbsplit" + description: "Skip BBSplit for removal of non-reference genome reads." + info: null + direction: "input" + - type: "boolean_true" + name: "--remove_ribo_rna" + description: "Enable the removal of reads derived from ribosomal RNA using SortMeRNA." + info: null + direction: "input" +- name: "Output" + arguments: + - type: "file" + name: "--qc_output1" + description: "Path to output directory" + info: null + default: + - "${id}_r1.fastq.gz" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qc_output2" + description: "Path to output directory" + info: null + default: + - "${id}_r2.fastq.gz" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_html_1" + description: "FastQC HTML report for read 1." + info: null + default: + - "${id}_r1.fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_html_2" + description: "FastQC HTML report for read 2." + info: null + default: + - "${id}_r2.fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_zip_1" + description: "FastQC report archive for read 1." + info: null + default: + - "${id}_r1.fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_zip_2" + description: "FastQC report archive for read 2." + info: null + default: + - "${id}_r2.fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_log_1" + info: null + default: + - "${id}_r1.trimming_report.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_log_2" + info: null + default: + - "${id}_r2.trimming_report.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_html_1" + info: null + default: + - "${id}_r1.trimmed_fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_html_2" + info: null + default: + - "${id}_r2.trimmed_fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_zip_1" + info: null + default: + - "${id}_r1.trimmed_fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_zip_2" + info: null + default: + - "${id}_r2.trimmed_fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sortmerna_log" + description: "Sortmerna log file." + info: null + default: + - "$id.sortmerna.log" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_quant_output" + description: "Results from Salmon quant" + info: null + default: + - "$id.salmon_quant_output" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_json" + description: "The fastp json format report file name" + info: null + default: + - "$id.fastp_out.json" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_html" + description: "The fastp html format report file name" + info: null + default: + - "$id.fastp_out.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--merged_out" + description: "File name to store merged fastp output." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A subworkflow for the pre-processing stage of the nf-core/rnaseq pipeline.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "fastqc" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "umi_tools/umi_tools_extract" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "trimgalore" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "bbmap/bbmap_bbsplit" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "sortmerna" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "fastp" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "fq_subsample" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "salmon/salmon_quant" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/pre_processing/config.vsh.yaml" + runner: "executable" + engine: "native" + output: "target/executable/workflows/pre_processing" + executable: "target/executable/workflows/pre_processing/pre_processing" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/workflows/pre_processing/nextflow_labels.config b/target/executable/workflows/pre_processing/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/workflows/pre_processing/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/workflows/pre_processing/pre_processing b/target/executable/workflows/pre_processing/pre_processing new file mode 100755 index 0000000..c06bfe9 --- /dev/null +++ b/target/executable/workflows/pre_processing/pre_processing @@ -0,0 +1,1779 @@ +#!/usr/bin/env bash + +# pre_processing v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="pre_processing" +VIASH_META_FUNCTIONALITY_NAME="pre_processing" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "pre_processing v0.3.0" + echo "" + echo "A subworkflow for the pre-processing stage of the nf-core/rnaseq pipeline." + echo "" + echo "Inputs:" + echo " --id" + echo " type: string, required parameter" + echo " example: foo" + echo " ID of the sample." + echo "" + echo " --fastq_1" + echo " type: file, required parameter, file must exist" + echo " example: input.fastq.gz" + echo " Path to the sample (or read 1 of paired end sample)." + echo "" + echo " --fastq_2" + echo " type: file" + echo " Path to read 2 of the sample." + echo "" + echo " --strandedness" + echo " type: string" + echo " default: auto" + echo " Sample strand-specificity. Must be one of unstranded, forward, reverse" + echo " or auto" + echo "" + echo " --bbsplit_index" + echo " type: file, file must exist" + echo " BBsplit index" + echo "" + echo " --ribo_database_manifest" + echo " type: file, file must exist" + echo " Text file containing paths to fasta files (one per line) that will be" + echo " used to create the database for SortMeRNA." + echo "" + echo " --transcript_fasta" + echo " type: file, file must exist" + echo " Path to FASTA transcriptome file." + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo " Path to GTF annotation file." + echo "" + echo " --salmon_index" + echo " type: file, file must exist" + echo " Path to directory containing the Salmon index" + echo "" + echo " --num_trimmed_reads" + echo " type: integer" + echo " Number of reads after trimming" + echo "" + echo "Extra pipeline options:" + echo " --skip_qc" + echo " type: boolean" + echo " Skip QC steps of the workflow." + echo "" + echo "FastQC options:" + echo " --skip_fastqc" + echo " type: boolean" + echo " default: false" + echo " Skip FatQC step." + echo "" + echo "UMI-tools options:" + echo " --with_umi" + echo " type: boolean" + echo " default: false" + echo " Enable UMI-based read deduplication." + echo "" + echo " --skip_umi_extract" + echo " type: boolean" + echo " default: false" + echo " Skip umi_tools extract step." + echo "" + echo " --umitools_extract_method" + echo " type: string" + echo " default: string" + echo " choices: [ string, regex ]" + echo " UMI pattern to use." + echo "" + echo " --umitools_bc_pattern" + echo " type: string" + echo " default:" + echo " The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6" + echo " nucleotides of the read are from the UMI." + echo "" + echo " --umitools_bc_pattern2" + echo " type: string" + echo " default:" + echo " The UMI barcode pattern to use if the UMI is located in read 2." + echo "" + echo " --umi_discard_read" + echo " type: integer" + echo " default: 0" + echo " choices: [ 0, 1, 2 ]" + echo " After UMI barcode extraction discard either R1 or R2 by setting this" + echo " parameter to 1 or 2, respectively." + echo "" + echo " --umitools_umi_separator" + echo " type: string" + echo " default: _" + echo " The character that separates the UMI in the read name. Most likely a" + echo " colon if you skipped the extraction with UMI-tools and used other" + echo " software." + echo "" + echo " --umitools_grouping_method" + echo " type: string" + echo " default: directional" + echo " choices: [ unique, percentile, cluster, adjacency, directional ]" + echo " Method to use to determine read groups by subsuming those with similar" + echo " UMIs. All methods start by identifying the reads with the same mapping" + echo " position, but treat similar yet nonidentical UMIs differently." + echo "" + echo " --save_umi_intermeds" + echo " type: boolean" + echo " default: false" + echo " If this option is specified, intermediate FastQ and BAM files produced" + echo " by UMI-tools are also saved in the results directory." + echo "" + echo "Read trimming options:" + echo " --trimmer" + echo " type: string" + echo " default: trimgalore" + echo " choices: [ trimgalore, fastp ]" + echo " Specify the trimming tool to use." + echo "" + echo " --min_trimmed_reads" + echo " type: integer" + echo " default: 10000" + echo " Minimum number of trimmed reads below which samples are removed from" + echo " further processing. Some downstream steps in the pipeline will fail if" + echo " this threshold is too low." + echo "" + echo " --skip_trimming" + echo " type: boolean" + echo " default: false" + echo " Skip the adapter trimming step." + echo "" + echo " --save_trimmed" + echo " type: boolean" + echo " default: false" + echo " Save the trimmed FastQ files in the results directory." + echo "" + echo "Read filtering options:" + echo " --skip_bbsplit" + echo " type: boolean_true" + echo " Skip BBSplit for removal of non-reference genome reads." + echo "" + echo " --remove_ribo_rna" + echo " type: boolean_true" + echo " Enable the removal of reads derived from ribosomal RNA using SortMeRNA." + echo "" + echo "Output:" + echo " --qc_output1" + echo " type: file, output" + echo " default: \${id}_r1.fastq.gz" + echo " Path to output directory" + echo "" + echo " --qc_output2" + echo " type: file, output" + echo " default: \${id}_r2.fastq.gz" + echo " Path to output directory" + echo "" + echo " --fastqc_html_1" + echo " type: file, output" + echo " default: \${id}_r1.fastqc.html" + echo " FastQC HTML report for read 1." + echo "" + echo " --fastqc_html_2" + echo " type: file, output" + echo " default: \${id}_r2.fastqc.html" + echo " FastQC HTML report for read 2." + echo "" + echo " --fastqc_zip_1" + echo " type: file, output" + echo " default: \${id}_r1.fastqc.zip" + echo " FastQC report archive for read 1." + echo "" + echo " --fastqc_zip_2" + echo " type: file, output" + echo " default: \${id}_r2.fastqc.zip" + echo " FastQC report archive for read 2." + echo "" + echo " --trim_log_1" + echo " type: file, output" + echo " default: \${id}_r1.trimming_report.txt" + echo "" + echo " --trim_log_2" + echo " type: file, output" + echo " default: \${id}_r2.trimming_report.txt" + echo "" + echo " --trim_html_1" + echo " type: file, output" + echo " default: \${id}_r1.trimmed_fastqc.html" + echo "" + echo " --trim_html_2" + echo " type: file, output" + echo " default: \${id}_r2.trimmed_fastqc.html" + echo "" + echo " --trim_zip_1" + echo " type: file, output" + echo " default: \${id}_r1.trimmed_fastqc.zip" + echo "" + echo " --trim_zip_2" + echo " type: file, output" + echo " default: \${id}_r2.trimmed_fastqc.zip" + echo "" + echo " --sortmerna_log" + echo " type: file, output" + echo " default: \$id.sortmerna.log" + echo " Sortmerna log file." + echo "" + echo " --salmon_quant_output" + echo " type: file, output, file must exist" + echo " default: \$id.salmon_quant_output" + echo " Results from Salmon quant" + echo "" + echo " --trim_json" + echo " type: file, output, file must exist" + echo " default: \$id.fastp_out.json" + echo " The fastp json format report file name" + echo "" + echo " --trim_html" + echo " type: file, output, file must exist" + echo " default: \$id.fastp_out.html" + echo " The fastp html format report file name" + echo "" + echo " --merged_out" + echo " type: file, output, file must exist" + echo " File name to store merged fastp output." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "pre_processing v0.3.0" + exit + ;; + --id) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id=*) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id=*\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastq_1) + [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_1=*) + [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1=*\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastq_2) + [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_2=*) + [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2=*\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --strandedness) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --strandedness. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strandedness=*) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness=*\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bbsplit_index) + [ -n "$VIASH_PAR_BBSPLIT_INDEX" ] && ViashError Bad arguments for option \'--bbsplit_index\': \'$VIASH_PAR_BBSPLIT_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BBSPLIT_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bbsplit_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bbsplit_index=*) + [ -n "$VIASH_PAR_BBSPLIT_INDEX" ] && ViashError Bad arguments for option \'--bbsplit_index=*\': \'$VIASH_PAR_BBSPLIT_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BBSPLIT_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --ribo_database_manifest) + [ -n "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ] && ViashError Bad arguments for option \'--ribo_database_manifest\': \'$VIASH_PAR_RIBO_DATABASE_MANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RIBO_DATABASE_MANIFEST="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --ribo_database_manifest. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --ribo_database_manifest=*) + [ -n "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ] && ViashError Bad arguments for option \'--ribo_database_manifest=*\': \'$VIASH_PAR_RIBO_DATABASE_MANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RIBO_DATABASE_MANIFEST=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcript_fasta) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--transcript_fasta\': \'$VIASH_PAR_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcript_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcript_fasta=*) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--transcript_fasta=*\': \'$VIASH_PAR_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf=*) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --salmon_index) + [ -n "$VIASH_PAR_SALMON_INDEX" ] && ViashError Bad arguments for option \'--salmon_index\': \'$VIASH_PAR_SALMON_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_index=*) + [ -n "$VIASH_PAR_SALMON_INDEX" ] && ViashError Bad arguments for option \'--salmon_index=*\': \'$VIASH_PAR_SALMON_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --num_trimmed_reads) + [ -n "$VIASH_PAR_NUM_TRIMMED_READS" ] && ViashError Bad arguments for option \'--num_trimmed_reads\': \'$VIASH_PAR_NUM_TRIMMED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_TRIMMED_READS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --num_trimmed_reads. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --num_trimmed_reads=*) + [ -n "$VIASH_PAR_NUM_TRIMMED_READS" ] && ViashError Bad arguments for option \'--num_trimmed_reads=*\': \'$VIASH_PAR_NUM_TRIMMED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_TRIMMED_READS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_qc) + [ -n "$VIASH_PAR_SKIP_QC" ] && ViashError Bad arguments for option \'--skip_qc\': \'$VIASH_PAR_SKIP_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_QC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_qc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_qc=*) + [ -n "$VIASH_PAR_SKIP_QC" ] && ViashError Bad arguments for option \'--skip_qc=*\': \'$VIASH_PAR_SKIP_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_QC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_fastqc) + [ -n "$VIASH_PAR_SKIP_FASTQC" ] && ViashError Bad arguments for option \'--skip_fastqc\': \'$VIASH_PAR_SKIP_FASTQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_FASTQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_fastqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_fastqc=*) + [ -n "$VIASH_PAR_SKIP_FASTQC" ] && ViashError Bad arguments for option \'--skip_fastqc=*\': \'$VIASH_PAR_SKIP_FASTQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_FASTQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --with_umi) + [ -n "$VIASH_PAR_WITH_UMI" ] && ViashError Bad arguments for option \'--with_umi\': \'$VIASH_PAR_WITH_UMI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WITH_UMI="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --with_umi. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --with_umi=*) + [ -n "$VIASH_PAR_WITH_UMI" ] && ViashError Bad arguments for option \'--with_umi=*\': \'$VIASH_PAR_WITH_UMI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WITH_UMI=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_umi_extract) + [ -n "$VIASH_PAR_SKIP_UMI_EXTRACT" ] && ViashError Bad arguments for option \'--skip_umi_extract\': \'$VIASH_PAR_SKIP_UMI_EXTRACT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_UMI_EXTRACT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_umi_extract. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_umi_extract=*) + [ -n "$VIASH_PAR_SKIP_UMI_EXTRACT" ] && ViashError Bad arguments for option \'--skip_umi_extract=*\': \'$VIASH_PAR_SKIP_UMI_EXTRACT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_UMI_EXTRACT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --umitools_extract_method) + [ -n "$VIASH_PAR_UMITOOLS_EXTRACT_METHOD" ] && ViashError Bad arguments for option \'--umitools_extract_method\': \'$VIASH_PAR_UMITOOLS_EXTRACT_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_EXTRACT_METHOD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --umitools_extract_method. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --umitools_extract_method=*) + [ -n "$VIASH_PAR_UMITOOLS_EXTRACT_METHOD" ] && ViashError Bad arguments for option \'--umitools_extract_method=*\': \'$VIASH_PAR_UMITOOLS_EXTRACT_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_EXTRACT_METHOD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --umitools_bc_pattern) + [ -n "$VIASH_PAR_UMITOOLS_BC_PATTERN" ] && ViashError Bad arguments for option \'--umitools_bc_pattern\': \'$VIASH_PAR_UMITOOLS_BC_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_BC_PATTERN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --umitools_bc_pattern. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --umitools_bc_pattern=*) + [ -n "$VIASH_PAR_UMITOOLS_BC_PATTERN" ] && ViashError Bad arguments for option \'--umitools_bc_pattern=*\': \'$VIASH_PAR_UMITOOLS_BC_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_BC_PATTERN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --umitools_bc_pattern2) + [ -n "$VIASH_PAR_UMITOOLS_BC_PATTERN2" ] && ViashError Bad arguments for option \'--umitools_bc_pattern2\': \'$VIASH_PAR_UMITOOLS_BC_PATTERN2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_BC_PATTERN2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --umitools_bc_pattern2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --umitools_bc_pattern2=*) + [ -n "$VIASH_PAR_UMITOOLS_BC_PATTERN2" ] && ViashError Bad arguments for option \'--umitools_bc_pattern2=*\': \'$VIASH_PAR_UMITOOLS_BC_PATTERN2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_BC_PATTERN2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --umi_discard_read) + [ -n "$VIASH_PAR_UMI_DISCARD_READ" ] && ViashError Bad arguments for option \'--umi_discard_read\': \'$VIASH_PAR_UMI_DISCARD_READ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMI_DISCARD_READ="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --umi_discard_read. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --umi_discard_read=*) + [ -n "$VIASH_PAR_UMI_DISCARD_READ" ] && ViashError Bad arguments for option \'--umi_discard_read=*\': \'$VIASH_PAR_UMI_DISCARD_READ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMI_DISCARD_READ=$(ViashRemoveFlags "$1") + shift 1 + ;; + --umitools_umi_separator) + [ -n "$VIASH_PAR_UMITOOLS_UMI_SEPARATOR" ] && ViashError Bad arguments for option \'--umitools_umi_separator\': \'$VIASH_PAR_UMITOOLS_UMI_SEPARATOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_UMI_SEPARATOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --umitools_umi_separator. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --umitools_umi_separator=*) + [ -n "$VIASH_PAR_UMITOOLS_UMI_SEPARATOR" ] && ViashError Bad arguments for option \'--umitools_umi_separator=*\': \'$VIASH_PAR_UMITOOLS_UMI_SEPARATOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_UMI_SEPARATOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --umitools_grouping_method) + [ -n "$VIASH_PAR_UMITOOLS_GROUPING_METHOD" ] && ViashError Bad arguments for option \'--umitools_grouping_method\': \'$VIASH_PAR_UMITOOLS_GROUPING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_GROUPING_METHOD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --umitools_grouping_method. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --umitools_grouping_method=*) + [ -n "$VIASH_PAR_UMITOOLS_GROUPING_METHOD" ] && ViashError Bad arguments for option \'--umitools_grouping_method=*\': \'$VIASH_PAR_UMITOOLS_GROUPING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_GROUPING_METHOD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --save_umi_intermeds) + [ -n "$VIASH_PAR_SAVE_UMI_INTERMEDS" ] && ViashError Bad arguments for option \'--save_umi_intermeds\': \'$VIASH_PAR_SAVE_UMI_INTERMEDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAVE_UMI_INTERMEDS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --save_umi_intermeds. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --save_umi_intermeds=*) + [ -n "$VIASH_PAR_SAVE_UMI_INTERMEDS" ] && ViashError Bad arguments for option \'--save_umi_intermeds=*\': \'$VIASH_PAR_SAVE_UMI_INTERMEDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAVE_UMI_INTERMEDS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trimmer) + [ -n "$VIASH_PAR_TRIMMER" ] && ViashError Bad arguments for option \'--trimmer\': \'$VIASH_PAR_TRIMMER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIMMER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trimmer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trimmer=*) + [ -n "$VIASH_PAR_TRIMMER" ] && ViashError Bad arguments for option \'--trimmer=*\': \'$VIASH_PAR_TRIMMER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIMMER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_trimmed_reads) + [ -n "$VIASH_PAR_MIN_TRIMMED_READS" ] && ViashError Bad arguments for option \'--min_trimmed_reads\': \'$VIASH_PAR_MIN_TRIMMED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_TRIMMED_READS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_trimmed_reads. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_trimmed_reads=*) + [ -n "$VIASH_PAR_MIN_TRIMMED_READS" ] && ViashError Bad arguments for option \'--min_trimmed_reads=*\': \'$VIASH_PAR_MIN_TRIMMED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_TRIMMED_READS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_trimming) + [ -n "$VIASH_PAR_SKIP_TRIMMING" ] && ViashError Bad arguments for option \'--skip_trimming\': \'$VIASH_PAR_SKIP_TRIMMING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_TRIMMING="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_trimming. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_trimming=*) + [ -n "$VIASH_PAR_SKIP_TRIMMING" ] && ViashError Bad arguments for option \'--skip_trimming=*\': \'$VIASH_PAR_SKIP_TRIMMING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_TRIMMING=$(ViashRemoveFlags "$1") + shift 1 + ;; + --save_trimmed) + [ -n "$VIASH_PAR_SAVE_TRIMMED" ] && ViashError Bad arguments for option \'--save_trimmed\': \'$VIASH_PAR_SAVE_TRIMMED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAVE_TRIMMED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --save_trimmed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --save_trimmed=*) + [ -n "$VIASH_PAR_SAVE_TRIMMED" ] && ViashError Bad arguments for option \'--save_trimmed=*\': \'$VIASH_PAR_SAVE_TRIMMED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAVE_TRIMMED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_bbsplit) + [ -n "$VIASH_PAR_SKIP_BBSPLIT" ] && ViashError Bad arguments for option \'--skip_bbsplit\': \'$VIASH_PAR_SKIP_BBSPLIT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_BBSPLIT=true + shift 1 + ;; + --remove_ribo_rna) + [ -n "$VIASH_PAR_REMOVE_RIBO_RNA" ] && ViashError Bad arguments for option \'--remove_ribo_rna\': \'$VIASH_PAR_REMOVE_RIBO_RNA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REMOVE_RIBO_RNA=true + shift 1 + ;; + --qc_output1) + [ -n "$VIASH_PAR_QC_OUTPUT1" ] && ViashError Bad arguments for option \'--qc_output1\': \'$VIASH_PAR_QC_OUTPUT1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QC_OUTPUT1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qc_output1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --qc_output1=*) + [ -n "$VIASH_PAR_QC_OUTPUT1" ] && ViashError Bad arguments for option \'--qc_output1=*\': \'$VIASH_PAR_QC_OUTPUT1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QC_OUTPUT1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --qc_output2) + [ -n "$VIASH_PAR_QC_OUTPUT2" ] && ViashError Bad arguments for option \'--qc_output2\': \'$VIASH_PAR_QC_OUTPUT2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QC_OUTPUT2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qc_output2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --qc_output2=*) + [ -n "$VIASH_PAR_QC_OUTPUT2" ] && ViashError Bad arguments for option \'--qc_output2=*\': \'$VIASH_PAR_QC_OUTPUT2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QC_OUTPUT2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastqc_html_1) + [ -n "$VIASH_PAR_FASTQC_HTML_1" ] && ViashError Bad arguments for option \'--fastqc_html_1\': \'$VIASH_PAR_FASTQC_HTML_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_HTML_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_html_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastqc_html_1=*) + [ -n "$VIASH_PAR_FASTQC_HTML_1" ] && ViashError Bad arguments for option \'--fastqc_html_1=*\': \'$VIASH_PAR_FASTQC_HTML_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_HTML_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastqc_html_2) + [ -n "$VIASH_PAR_FASTQC_HTML_2" ] && ViashError Bad arguments for option \'--fastqc_html_2\': \'$VIASH_PAR_FASTQC_HTML_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_HTML_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_html_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastqc_html_2=*) + [ -n "$VIASH_PAR_FASTQC_HTML_2" ] && ViashError Bad arguments for option \'--fastqc_html_2=*\': \'$VIASH_PAR_FASTQC_HTML_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_HTML_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastqc_zip_1) + [ -n "$VIASH_PAR_FASTQC_ZIP_1" ] && ViashError Bad arguments for option \'--fastqc_zip_1\': \'$VIASH_PAR_FASTQC_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_ZIP_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_zip_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastqc_zip_1=*) + [ -n "$VIASH_PAR_FASTQC_ZIP_1" ] && ViashError Bad arguments for option \'--fastqc_zip_1=*\': \'$VIASH_PAR_FASTQC_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_ZIP_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastqc_zip_2) + [ -n "$VIASH_PAR_FASTQC_ZIP_2" ] && ViashError Bad arguments for option \'--fastqc_zip_2\': \'$VIASH_PAR_FASTQC_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_ZIP_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_zip_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastqc_zip_2=*) + [ -n "$VIASH_PAR_FASTQC_ZIP_2" ] && ViashError Bad arguments for option \'--fastqc_zip_2=*\': \'$VIASH_PAR_FASTQC_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_ZIP_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_log_1) + [ -n "$VIASH_PAR_TRIM_LOG_1" ] && ViashError Bad arguments for option \'--trim_log_1\': \'$VIASH_PAR_TRIM_LOG_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_LOG_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_log_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_log_1=*) + [ -n "$VIASH_PAR_TRIM_LOG_1" ] && ViashError Bad arguments for option \'--trim_log_1=*\': \'$VIASH_PAR_TRIM_LOG_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_LOG_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_log_2) + [ -n "$VIASH_PAR_TRIM_LOG_2" ] && ViashError Bad arguments for option \'--trim_log_2\': \'$VIASH_PAR_TRIM_LOG_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_LOG_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_log_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_log_2=*) + [ -n "$VIASH_PAR_TRIM_LOG_2" ] && ViashError Bad arguments for option \'--trim_log_2=*\': \'$VIASH_PAR_TRIM_LOG_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_LOG_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_html_1) + [ -n "$VIASH_PAR_TRIM_HTML_1" ] && ViashError Bad arguments for option \'--trim_html_1\': \'$VIASH_PAR_TRIM_HTML_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_HTML_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_html_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_html_1=*) + [ -n "$VIASH_PAR_TRIM_HTML_1" ] && ViashError Bad arguments for option \'--trim_html_1=*\': \'$VIASH_PAR_TRIM_HTML_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_HTML_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_html_2) + [ -n "$VIASH_PAR_TRIM_HTML_2" ] && ViashError Bad arguments for option \'--trim_html_2\': \'$VIASH_PAR_TRIM_HTML_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_HTML_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_html_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_html_2=*) + [ -n "$VIASH_PAR_TRIM_HTML_2" ] && ViashError Bad arguments for option \'--trim_html_2=*\': \'$VIASH_PAR_TRIM_HTML_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_HTML_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_zip_1) + [ -n "$VIASH_PAR_TRIM_ZIP_1" ] && ViashError Bad arguments for option \'--trim_zip_1\': \'$VIASH_PAR_TRIM_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_ZIP_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_zip_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_zip_1=*) + [ -n "$VIASH_PAR_TRIM_ZIP_1" ] && ViashError Bad arguments for option \'--trim_zip_1=*\': \'$VIASH_PAR_TRIM_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_ZIP_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_zip_2) + [ -n "$VIASH_PAR_TRIM_ZIP_2" ] && ViashError Bad arguments for option \'--trim_zip_2\': \'$VIASH_PAR_TRIM_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_ZIP_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_zip_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_zip_2=*) + [ -n "$VIASH_PAR_TRIM_ZIP_2" ] && ViashError Bad arguments for option \'--trim_zip_2=*\': \'$VIASH_PAR_TRIM_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_ZIP_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sortmerna_log) + [ -n "$VIASH_PAR_SORTMERNA_LOG" ] && ViashError Bad arguments for option \'--sortmerna_log\': \'$VIASH_PAR_SORTMERNA_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SORTMERNA_LOG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sortmerna_log. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sortmerna_log=*) + [ -n "$VIASH_PAR_SORTMERNA_LOG" ] && ViashError Bad arguments for option \'--sortmerna_log=*\': \'$VIASH_PAR_SORTMERNA_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SORTMERNA_LOG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --salmon_quant_output) + [ -n "$VIASH_PAR_SALMON_QUANT_OUTPUT" ] && ViashError Bad arguments for option \'--salmon_quant_output\': \'$VIASH_PAR_SALMON_QUANT_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_QUANT_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_quant_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_quant_output=*) + [ -n "$VIASH_PAR_SALMON_QUANT_OUTPUT" ] && ViashError Bad arguments for option \'--salmon_quant_output=*\': \'$VIASH_PAR_SALMON_QUANT_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_QUANT_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_json) + [ -n "$VIASH_PAR_TRIM_JSON" ] && ViashError Bad arguments for option \'--trim_json\': \'$VIASH_PAR_TRIM_JSON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_JSON="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_json. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_json=*) + [ -n "$VIASH_PAR_TRIM_JSON" ] && ViashError Bad arguments for option \'--trim_json=*\': \'$VIASH_PAR_TRIM_JSON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_JSON=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_html) + [ -n "$VIASH_PAR_TRIM_HTML" ] && ViashError Bad arguments for option \'--trim_html\': \'$VIASH_PAR_TRIM_HTML\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_HTML="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_html. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_html=*) + [ -n "$VIASH_PAR_TRIM_HTML" ] && ViashError Bad arguments for option \'--trim_html=*\': \'$VIASH_PAR_TRIM_HTML\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_HTML=$(ViashRemoveFlags "$1") + shift 1 + ;; + --merged_out) + [ -n "$VIASH_PAR_MERGED_OUT" ] && ViashError Bad arguments for option \'--merged_out\': \'$VIASH_PAR_MERGED_OUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MERGED_OUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --merged_out. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --merged_out=*) + [ -n "$VIASH_PAR_MERGED_OUT" ] && ViashError Bad arguments for option \'--merged_out=*\': \'$VIASH_PAR_MERGED_OUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MERGED_OUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: native." + exit 1 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_ID+x} ]; then + ViashError '--id' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_FASTQ_1+x} ]; then + ViashError '--fastq_1' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_STRANDEDNESS+x} ]; then + VIASH_PAR_STRANDEDNESS="auto" +fi +if [ -z ${VIASH_PAR_SKIP_FASTQC+x} ]; then + VIASH_PAR_SKIP_FASTQC="false" +fi +if [ -z ${VIASH_PAR_WITH_UMI+x} ]; then + VIASH_PAR_WITH_UMI="false" +fi +if [ -z ${VIASH_PAR_SKIP_UMI_EXTRACT+x} ]; then + VIASH_PAR_SKIP_UMI_EXTRACT="false" +fi +if [ -z ${VIASH_PAR_UMITOOLS_EXTRACT_METHOD+x} ]; then + VIASH_PAR_UMITOOLS_EXTRACT_METHOD="string" +fi +if [ -z ${VIASH_PAR_UMITOOLS_BC_PATTERN+x} ]; then + VIASH_PAR_UMITOOLS_BC_PATTERN="" +fi +if [ -z ${VIASH_PAR_UMITOOLS_BC_PATTERN2+x} ]; then + VIASH_PAR_UMITOOLS_BC_PATTERN2="" +fi +if [ -z ${VIASH_PAR_UMI_DISCARD_READ+x} ]; then + VIASH_PAR_UMI_DISCARD_READ="0" +fi +if [ -z ${VIASH_PAR_UMITOOLS_UMI_SEPARATOR+x} ]; then + VIASH_PAR_UMITOOLS_UMI_SEPARATOR="_" +fi +if [ -z ${VIASH_PAR_UMITOOLS_GROUPING_METHOD+x} ]; then + VIASH_PAR_UMITOOLS_GROUPING_METHOD="directional" +fi +if [ -z ${VIASH_PAR_SAVE_UMI_INTERMEDS+x} ]; then + VIASH_PAR_SAVE_UMI_INTERMEDS="false" +fi +if [ -z ${VIASH_PAR_TRIMMER+x} ]; then + VIASH_PAR_TRIMMER="trimgalore" +fi +if [ -z ${VIASH_PAR_MIN_TRIMMED_READS+x} ]; then + VIASH_PAR_MIN_TRIMMED_READS="10000" +fi +if [ -z ${VIASH_PAR_SKIP_TRIMMING+x} ]; then + VIASH_PAR_SKIP_TRIMMING="false" +fi +if [ -z ${VIASH_PAR_SAVE_TRIMMED+x} ]; then + VIASH_PAR_SAVE_TRIMMED="false" +fi +if [ -z ${VIASH_PAR_SKIP_BBSPLIT+x} ]; then + VIASH_PAR_SKIP_BBSPLIT="false" +fi +if [ -z ${VIASH_PAR_REMOVE_RIBO_RNA+x} ]; then + VIASH_PAR_REMOVE_RIBO_RNA="false" +fi +if [ -z ${VIASH_PAR_QC_OUTPUT1+x} ]; then + VIASH_PAR_QC_OUTPUT1="\${id}_r1.fastq.gz" +fi +if [ -z ${VIASH_PAR_QC_OUTPUT2+x} ]; then + VIASH_PAR_QC_OUTPUT2="\${id}_r2.fastq.gz" +fi +if [ -z ${VIASH_PAR_FASTQC_HTML_1+x} ]; then + VIASH_PAR_FASTQC_HTML_1="\${id}_r1.fastqc.html" +fi +if [ -z ${VIASH_PAR_FASTQC_HTML_2+x} ]; then + VIASH_PAR_FASTQC_HTML_2="\${id}_r2.fastqc.html" +fi +if [ -z ${VIASH_PAR_FASTQC_ZIP_1+x} ]; then + VIASH_PAR_FASTQC_ZIP_1="\${id}_r1.fastqc.zip" +fi +if [ -z ${VIASH_PAR_FASTQC_ZIP_2+x} ]; then + VIASH_PAR_FASTQC_ZIP_2="\${id}_r2.fastqc.zip" +fi +if [ -z ${VIASH_PAR_TRIM_LOG_1+x} ]; then + VIASH_PAR_TRIM_LOG_1="\${id}_r1.trimming_report.txt" +fi +if [ -z ${VIASH_PAR_TRIM_LOG_2+x} ]; then + VIASH_PAR_TRIM_LOG_2="\${id}_r2.trimming_report.txt" +fi +if [ -z ${VIASH_PAR_TRIM_HTML_1+x} ]; then + VIASH_PAR_TRIM_HTML_1="\${id}_r1.trimmed_fastqc.html" +fi +if [ -z ${VIASH_PAR_TRIM_HTML_2+x} ]; then + VIASH_PAR_TRIM_HTML_2="\${id}_r2.trimmed_fastqc.html" +fi +if [ -z ${VIASH_PAR_TRIM_ZIP_1+x} ]; then + VIASH_PAR_TRIM_ZIP_1="\${id}_r1.trimmed_fastqc.zip" +fi +if [ -z ${VIASH_PAR_TRIM_ZIP_2+x} ]; then + VIASH_PAR_TRIM_ZIP_2="\${id}_r2.trimmed_fastqc.zip" +fi +if [ -z ${VIASH_PAR_SORTMERNA_LOG+x} ]; then + VIASH_PAR_SORTMERNA_LOG="\$id.sortmerna.log" +fi +if [ -z ${VIASH_PAR_SALMON_QUANT_OUTPUT+x} ]; then + VIASH_PAR_SALMON_QUANT_OUTPUT="\$id.salmon_quant_output" +fi +if [ -z ${VIASH_PAR_TRIM_JSON+x} ]; then + VIASH_PAR_TRIM_JSON="\$id.fastp_out.json" +fi +if [ -z ${VIASH_PAR_TRIM_HTML+x} ]; then + VIASH_PAR_TRIM_HTML="\$id.fastp_out.html" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTQ_1" ] && [ ! -e "$VIASH_PAR_FASTQ_1" ]; then + ViashError "Input file '$VIASH_PAR_FASTQ_1' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BBSPLIT_INDEX" ] && [ ! -e "$VIASH_PAR_BBSPLIT_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_BBSPLIT_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ] && [ ! -e "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ]; then + ViashError "Input file '$VIASH_PAR_RIBO_DATABASE_MANIFEST' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPT_FASTA" ] && [ ! -e "$VIASH_PAR_TRANSCRIPT_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_TRANSCRIPT_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then + ViashError "Input file '$VIASH_PAR_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SALMON_INDEX" ] && [ ! -e "$VIASH_PAR_SALMON_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_SALMON_INDEX' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_NUM_TRIMMED_READS" ]]; then + if ! [[ "$VIASH_PAR_NUM_TRIMMED_READS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--num_trimmed_reads' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_QC" ]]; then + if ! [[ "$VIASH_PAR_SKIP_QC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_qc' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_FASTQC" ]]; then + if ! [[ "$VIASH_PAR_SKIP_FASTQC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_fastqc' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WITH_UMI" ]]; then + if ! [[ "$VIASH_PAR_WITH_UMI" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--with_umi' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_UMI_EXTRACT" ]]; then + if ! [[ "$VIASH_PAR_SKIP_UMI_EXTRACT" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_umi_extract' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_UMI_DISCARD_READ" ]]; then + if ! [[ "$VIASH_PAR_UMI_DISCARD_READ" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--umi_discard_read' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SAVE_UMI_INTERMEDS" ]]; then + if ! [[ "$VIASH_PAR_SAVE_UMI_INTERMEDS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--save_umi_intermeds' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_TRIMMED_READS" ]]; then + if ! [[ "$VIASH_PAR_MIN_TRIMMED_READS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_trimmed_reads' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_TRIMMING" ]]; then + if ! [[ "$VIASH_PAR_SKIP_TRIMMING" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_trimming' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SAVE_TRIMMED" ]]; then + if ! [[ "$VIASH_PAR_SAVE_TRIMMED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--save_trimmed' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_BBSPLIT" ]]; then + if ! [[ "$VIASH_PAR_SKIP_BBSPLIT" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_bbsplit' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_REMOVE_RIBO_RNA" ]]; then + if ! [[ "$VIASH_PAR_REMOVE_RIBO_RNA" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--remove_ribo_rna' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_UMITOOLS_EXTRACT_METHOD" ]; then + VIASH_PAR_UMITOOLS_EXTRACT_METHOD_CHOICES=("string;regex") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_UMITOOLS_EXTRACT_METHOD_CHOICES[*]};" =~ ";$VIASH_PAR_UMITOOLS_EXTRACT_METHOD;" ]]; then + ViashError '--umitools_extract_method' specified value of \'$VIASH_PAR_UMITOOLS_EXTRACT_METHOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_UMI_DISCARD_READ" ]; then + VIASH_PAR_UMI_DISCARD_READ_CHOICES=("0;1;2") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_UMI_DISCARD_READ_CHOICES[*]};" =~ ";$VIASH_PAR_UMI_DISCARD_READ;" ]]; then + ViashError '--umi_discard_read' specified value of \'$VIASH_PAR_UMI_DISCARD_READ\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_UMITOOLS_GROUPING_METHOD" ]; then + VIASH_PAR_UMITOOLS_GROUPING_METHOD_CHOICES=("unique;percentile;cluster;adjacency;directional") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_UMITOOLS_GROUPING_METHOD_CHOICES[*]};" =~ ";$VIASH_PAR_UMITOOLS_GROUPING_METHOD;" ]]; then + ViashError '--umitools_grouping_method' specified value of \'$VIASH_PAR_UMITOOLS_GROUPING_METHOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_TRIMMER" ]; then + VIASH_PAR_TRIMMER_CHOICES=("trimgalore;fastp") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_TRIMMER_CHOICES[*]};" =~ ";$VIASH_PAR_TRIMMER;" ]]; then + ViashError '--trimmer' specified value of \'$VIASH_PAR_TRIMMER\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_QC_OUTPUT1" ] && [ ! -d "$(dirname "$VIASH_PAR_QC_OUTPUT1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QC_OUTPUT1")" +fi +if [ ! -z "$VIASH_PAR_QC_OUTPUT2" ] && [ ! -d "$(dirname "$VIASH_PAR_QC_OUTPUT2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QC_OUTPUT2")" +fi +if [ ! -z "$VIASH_PAR_FASTQC_HTML_1" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQC_HTML_1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTQC_HTML_1")" +fi +if [ ! -z "$VIASH_PAR_FASTQC_HTML_2" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQC_HTML_2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTQC_HTML_2")" +fi +if [ ! -z "$VIASH_PAR_FASTQC_ZIP_1" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQC_ZIP_1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTQC_ZIP_1")" +fi +if [ ! -z "$VIASH_PAR_FASTQC_ZIP_2" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQC_ZIP_2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTQC_ZIP_2")" +fi +if [ ! -z "$VIASH_PAR_TRIM_LOG_1" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_LOG_1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_LOG_1")" +fi +if [ ! -z "$VIASH_PAR_TRIM_LOG_2" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_LOG_2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_LOG_2")" +fi +if [ ! -z "$VIASH_PAR_TRIM_HTML_1" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_HTML_1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_HTML_1")" +fi +if [ ! -z "$VIASH_PAR_TRIM_HTML_2" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_HTML_2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_HTML_2")" +fi +if [ ! -z "$VIASH_PAR_TRIM_ZIP_1" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_ZIP_1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_ZIP_1")" +fi +if [ ! -z "$VIASH_PAR_TRIM_ZIP_2" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_ZIP_2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_ZIP_2")" +fi +if [ ! -z "$VIASH_PAR_SORTMERNA_LOG" ] && [ ! -d "$(dirname "$VIASH_PAR_SORTMERNA_LOG")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_SORTMERNA_LOG")" +fi +if [ ! -z "$VIASH_PAR_SALMON_QUANT_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_SALMON_QUANT_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_SALMON_QUANT_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_TRIM_JSON" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_JSON")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_JSON")" +fi +if [ ! -z "$VIASH_PAR_TRIM_HTML" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_HTML")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_HTML")" +fi +if [ ! -z "$VIASH_PAR_MERGED_OUT" ] && [ ! -d "$(dirname "$VIASH_PAR_MERGED_OUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MERGED_OUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + + +# set dependency paths +VIASH_DEP_FASTQC="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/main.nf" +VIASH_DEP_UMI_TOOLS_UMI_TOOLS_EXTRACT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/main.nf" +VIASH_DEP_TRIMGALORE="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/main.nf" +VIASH_DEP_BBMAP_BBMAP_BBSPLIT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/main.nf" +VIASH_DEP_SORTMERNA="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/main.nf" +VIASH_DEP_FASTP="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/main.nf" +VIASH_DEP_FQ_SUBSAMPLE="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/main.nf" +VIASH_DEP_SALMON_SALMON_QUANT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/main.nf" + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-pre_processing-XXXXXX").nf +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +//// VIASH START +// The following code has been auto-generated by Viash. + +//// VIASH END +workflow run_wf { + + take: + input_ch + + main: + output_ch = input_ch + + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def paired = input.size() == 2 + [ id, state + [paired: paired, input: input] ] + } + + | fastqc.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_fastqc }, + fromState: [ "input": "input" ], + toState: {id, output_state, state -> + def newKeys = [ + "fastqc_html_1":output_state["html"][0], + "fastqc_html_2": output_state["html"][1], + "fastqc_zip_1": output_state["zip"][0], + "fastqc_zip_2": output_state["zip"][1] + ] + def new_state = state + newKeys + return new_state + }, + args: [html: "*.html", zip: "*.zip"], + directives: [ label: [ "midmem", "highcpu" ] ] + ) + + // Extract UMIs from fastq files and discard read 1 or read 2 if required + | umi_tools_extract.run ( + runIf: { id, state -> state.with_umi && !state.skip_umi_extract }, + fromState: { id, state -> + def bc_pattern2 = state.paired ? state.umitools_bc_pattern2 : state.remove(state.umitools_bc_pattern2) + def output = "\${id}.r1.fastq.gz" + def read2_out = state.paired ? "\${id}.r2.fastq.gz" : state.remove(state.fastq_2) + [ input: state.fastq_1, + read2_in: state.fastq_2, + bc_pattern: state.umitools_bc_pattern, + bc_pattern2: bc_pattern2, + extract_method: state.umitools_extract_method, + umi_separator: state.umitools_umi_separator, + grouping_method: state.umitools_grouping_method, + output: output, + read2_out: read2_out ] + }, + toState: [ + "fastq_1": "output", + "fastq_2": "read2_out" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Discard read if required + | map { id, state -> + def paired = state.paired + def fastq_1 = state.fastq_1 + def fastq_2 = state.fastq_2 + if (paired && state.with_umi && !state.skip_umi_extract && state.umi_discard_read != 0) { + if (state.umi_discard_read == 1) { + fastq_1 = fastq_2 + } + fastq_2 = state.remove(state.fastq_2) + paired = false + } + [ id, state + [paired: paired, fastq_1: fastq_1, fastq_2: fastq_2] ] + } + + // Trim reads using Trim galore! + | trimgalore.run ( + runIf: { id, state -> !state.skip_trimming && state.trimmer == "trimgalore" }, + fromState: { id, state -> + def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + [ paired: state.paired, + input: input, + min_trimmed_reads: state.min_trimmed_reads, + trimmed_r1: state.qc_output1, + trimmed_r2: state.qc_output2 ] + }, + args: [gzip: true, fastqc: true], + toState: [ + "fastq_1": "trimmed_r1", + "fastq_2": "trimmed_r2", + "trim_log_1": "trimming_report_r1", + "trim_log_2": "trimming_report_r2", + "trim_zip_1": "trimmed_fastqc_zip_1", + "trim_zip_2": "trimmed_fastqc_zip_2", + "trim_html_1": "trimmed_fastqc_html_1", + "trim_html_2": "trimmed_fastqc_html_2" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Trim reads using fastp + | fastp.run( + runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" }, + fromState: { id, state -> + def outputState = state.paired ? [out1: state.qc_output1, out2: state.qc_output2] : [out1: state.qc_output1, out2: state.remove(state.qc_output2)] + [input_1: state.fastq_1, input_2: state.fastq_2] + outputState + [ in1: state.fastq_1, + in2: state.fastq_2, + merge: state.fastp_save_merged, + interleaved_in: state.interleaved_reads, + detect_adapter_for_pe: state.paired, + adapter_fasta: state.fastp_adapter_fasta ] + outputState + }, + toState: [ + "fastq_1": "out1", + "fastq_2": "out2", + "failed_trim": "failed_out", + "failed_trim_unpaired1": "unpaired1", + "failed_trim_unpaired2": "unpaired2", + "trim_json": "json", + "trim_html": "html", + "trim_merged_out": "merged_out" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Perform FASTQC on reads trimmed using fastp + | fastqc.run ( + runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" }, + fromState: { id, state -> + def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + [ input: input ] + }, + toState: {id, output_state, state -> + def newKeys = [ + "trim_html_1":output_state["html"][0], + "trim_html_2": output_state["html"][1], + "trim_zip_1": output_state["zip"][0], + "trim_zip_2": output_state["zip"][1] + ] + def new_state = state + newKeys + return new_state + }, + args: [html: "*.html", zip: "*.zip"], + key: "fastqc_trimming", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Filter out contaminant RNA + | bbmap_bbsplit.run ( + runIf: { id, state -> !state.skip_bbsplit }, + fromState: { id, state -> + def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + [ paired: state.paired, + input: input, + build: state.bbsplit_index ] + }, + args: ["only_build_index": false], + toState: [ + "fastq_1": "fastq_1", + "fastq_2": "fastq_2" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Sort reads by rRNA and non-rRNA + | sortmerna.run ( + runIf: { id, state -> state.remove_ribo_rna }, + fromState: { id, state -> + def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def filePaths = state.ribo_database_manifest.readLines() + def refs = filePaths.collect { it } + def other = "\${id}_non_rRNA_reads/" + [ paired_in: state.paired, + input: input, + ref: refs, + out2: state.paired, + other: other ] + }, + args: [fastx: true, num_alignments: 1], + toState: { id, output_state, state -> + def newKeys = [ + "sortmerna_output": output_state["other"], + "sortmerna_log": output_state["log"] + ] + def new_state = state + newKeys + return new_state + }, + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | map { id, state -> + if (state.remove_ribo_rna) { + def fastq_1 = state.sortmerna_output.listFiles().find{it.name == "other_fwd.fq.gz"} + def fastq_2 = state.sortmerna_output.listFiles().find{it.name == "other_rev.fq.gz"} + [ id, state + [fastq_1: fastq_1, fastq_2: fastq_2] ] + } else { + [ id, state ] + } + } + + // Sub-sample FastQ files and pseudo-align with Salmon to auto-infer strandedness + | fq_subsample.run ( + runIf: { id, state -> state.strandedness == 'auto' }, + fromState: { id, state -> + def outputState = state.paired ? [output_1: state.qc_output1, output_2: state.qc_output2] : [output_1: state.qc_output1, output_2: state.remove(state.qc_output2)] + [input_1: state.fastq_1, input_2: state.fastq_2] + outputState + }, + args: [ + record_count: 1000, + seed: 1 + ], + toState: [ + "subsampled_fastq_1": "output_1", + "subsampled_fastq_2": "output_2" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Infer lib-type for salmon quant + | map { id, state -> + def lib_type = (state.paired) ? + ( + (state.strandedness == "forward") ? + "ISF" : + ( + (state.strandedness == "reverse") ? "ISR" : "IU" + ) + ) + : ( + (state.strandedness == "forward") ? + "SF" : + ( + (state.strandedness == "reverse") ? "SR" : "U" + ) + ) + [ id, state + [lib_type: lib_type] ] + } + + | salmon_quant.run ( + runIf: { id, state -> state.strandedness == 'auto' }, + fromState: { id, state -> + def unmated_reads = !state.paired ? state.subsampled_fastq_1 : null + def mates1 = state.paired ? state.subsampled_fastq_1 : null + def mates2 = state.paired ? state.subsampled_fastq_2 : null + [ unmated_reads: unmated_reads, + mates1: mates1, + mates2: mates2, + gene_map: state.gtf, + index: state.salmon_index, + lib_type: state.lib_type ] + }, + args: [ "skip_quant": true ], + toState: [ "salmon_quant_output": "output" ], + directives: [ label: [ "highmem", "highcpu" ] ] + ) + + | map { id, state -> + def mod_state = (!state.paired) ? + [trim_log_2: state.remove(state.trim_log_2), trim_zip_2: state.remove(state.trim_zip_2), trim_html_2: state.remove(state.trim_html_2), failed_trim_unpaired2: state.remove(state.failed_trim_unpaired2)] : + [] + [ id, state + mod_state ] + } + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + "fastqc_html_1": "fastqc_html_1", + "fastqc_html_2": "fastqc_html_2", + "fastqc_zip_1": "fastqc_zip_1", + "fastqc_zip_2": "fastqc_zip_2", + "qc_output1": "fastq_1", + "qc_output2": "fastq_2", + "trim_log_1": "trim_log_1", + "trim_log_2": "trim_log_2", + "trim_zip_1": "trim_zip_1", + "trim_zip_2": "trim_zip_2", + "trim_html_1": "trim_html_1", + "trim_html_2": "trim_html_2", + "sortmerna_log": "sortmerna_log", + "trim_json": "trim_json", + "trim_html": "trim_html", + "trim_merged_out": "trim_merged_out", + "salmon_quant_output": "salmon_quant_output" + ) + + emit: + output_ch +} +VIASHMAIN +nextflow run . -main-script "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_SALMON_QUANT_OUTPUT" ] && [ ! -e "$VIASH_PAR_SALMON_QUANT_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_SALMON_QUANT_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRIM_JSON" ] && [ ! -e "$VIASH_PAR_TRIM_JSON" ]; then + ViashError "Output file '$VIASH_PAR_TRIM_JSON' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRIM_HTML" ] && [ ! -e "$VIASH_PAR_TRIM_HTML" ]; then + ViashError "Output file '$VIASH_PAR_TRIM_HTML' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MERGED_OUT" ] && [ ! -e "$VIASH_PAR_MERGED_OUT" ]; then + ViashError "Output file '$VIASH_PAR_MERGED_OUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/workflows/prepare_genome/.config.vsh.yaml b/target/executable/workflows/prepare_genome/.config.vsh.yaml new file mode 100644 index 0000000..a2e34ab --- /dev/null +++ b/target/executable/workflows/prepare_genome/.config.vsh.yaml @@ -0,0 +1,556 @@ +name: "prepare_genome" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--fasta" + description: "Path to FASTA genome file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "Path to GTF annotation file. This parameter is *mandatory* if --genome\ + \ is not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gff" + description: "Path to GFF3 annotation file. Required if \"--gtf\" is not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--additional_fasta" + description: "FASTA file to concatenate to genome FASTA file e.g. containing spike-in\ + \ sequences." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcript_fasta" + description: "Path to FASTA transcriptome file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gene_bed" + description: "Path to BED file containing gene intervals. This will be created\ + \ from the GTF file if not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--splicesites" + description: "Splice sites file required for HISAT2." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_bbsplit" + description: "Skip BBSplit for removal of non-reference genome reads." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bbsplit_fasta_list" + description: "List of reference genomes (separated by \";\") to filter reads against\ + \ with BBSplit." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--star_index" + description: "Path to directory or tar.gz archive for pre-built STAR index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--star_sjdb_gtf_feature_exon" + description: "Feature type in GTF file to be used as exons for building transcripts" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_index" + description: "Path to directory or tar.gz archive for pre-built RSEM index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_index" + description: "Path to directory or tar.gz archive for pre-built Salmon index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--kallisto_index" + description: "Path to directory or tar.gz archive for pre-built Kallisto index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bbsplit_index" + description: "Path to directory or tar.gz archive for pre-built BBSplit index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--pseudo_aligner_kmer_size" + description: "Kmer length passed to indexing step of pseudoaligners." + info: null + default: + - 31 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--gencode" + description: "Specify if the GTF annotation is in GENCODE format." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--biotype" + description: "Biotype value to use while appending entries to GTF file when additional\ + \ fasta file is provided." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--filter_gtf" + description: "Whether to filter the GTF or not?" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--aligner" + description: "Specifies the alignment algorithm to use - available options are\ + \ 'star_salmon', 'star_rsem' and 'hisat2'." + info: null + default: + - "star_salmon" + required: false + choices: + - "star_salmon" + - "star_rsem" + - "hisat2" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--pseudo_aligner" + description: "Specifies the pseudo aligner to use - available options are 'salmon'.\ + \ Runs in addition to '--aligner'." + info: null + default: + - "salmon" + required: false + choices: + - "salmon" + - "kallisto" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--skip_alignment" + description: "Skip all of the alignment-based processes within the pipeline." + info: null + direction: "input" +- name: "Output" + arguments: + - type: "file" + name: "--fasta_uncompressed" + info: null + default: + - "reference_genome.fasta" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf_uncompressed" + info: null + default: + - "gene_annotation.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcript_fasta_uncompressed" + info: null + default: + - "transcriptome.fasta" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gene_bed_uncompressed" + info: null + default: + - "gene_annotation.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_index_uncompressed" + description: "Path to STAR index." + info: null + default: + - "STAR_index" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_index_uncompressed" + description: "Path to directory or tar.gz archive for pre-built RSEM index." + info: null + default: + - "RSEM_index" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_index_uncompressed" + description: "Path to Salmon index." + info: null + default: + - "Salmon_index" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--kallisto_index_uncompressed" + description: "Path to Kallisto index." + info: null + default: + - "Kallisto_index" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bbsplit_index_uncompressed" + description: "Path to BBSplit index." + info: null + default: + - "BBSplit_index" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chrom_sizes" + description: "File containing chromosome lengths" + info: null + default: + - "reference_genome.fasta.sizes" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fai" + description: "FASTA index file" + info: null + default: + - "reference_genome.fasta.fai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A subworkflow for preparing all the required genome references\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "gunzip" + repository: + type: "local" +- name: "gffread" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "cat_additional_fasta" + repository: + type: "local" +- name: "gtf2bed" + repository: + type: "local" +- name: "preprocess_transcripts_fasta" + repository: + type: "local" +- name: "gtf_filter" + repository: + type: "local" +- name: "rsem/rsem_prepare_reference" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "getchromsizes" + repository: + type: "local" +- name: "untar" + repository: + type: "vsh" + repo: "craftbox" + tag: "v0.1.0" +- name: "star/star_genome_generate" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "bbmap/bbmap_bbsplit" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "salmon/salmon_index" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "kallisto/kallisto_index" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/prepare_genome/config.vsh.yaml" + runner: "executable" + engine: "native" + output: "target/executable/workflows/prepare_genome" + executable: "target/executable/workflows/prepare_genome/prepare_genome" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/nextflow/gunzip" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread" + - "target/nextflow/cat_additional_fasta" + - "target/nextflow/gtf2bed" + - "target/nextflow/preprocess_transcripts_fasta" + - "target/nextflow/gtf_filter" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference" + - "target/nextflow/getchromsizes" + - "target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/workflows/prepare_genome/nextflow_labels.config b/target/executable/workflows/prepare_genome/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/workflows/prepare_genome/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/workflows/prepare_genome/prepare_genome b/target/executable/workflows/prepare_genome/prepare_genome new file mode 100755 index 0000000..e027359 --- /dev/null +++ b/target/executable/workflows/prepare_genome/prepare_genome @@ -0,0 +1,1550 @@ +#!/usr/bin/env bash + +# prepare_genome v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="prepare_genome" +VIASH_META_FUNCTIONALITY_NAME="prepare_genome" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "prepare_genome v0.3.0" + echo "" + echo "A subworkflow for preparing all the required genome references" + echo "" + echo "Input:" + echo " --fasta" + echo " type: file, required parameter, file must exist" + echo " Path to FASTA genome file." + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo " Path to GTF annotation file. This parameter is *mandatory* if --genome" + echo " is not specified." + echo "" + echo " --gff" + echo " type: file, file must exist" + echo " Path to GFF3 annotation file. Required if \"--gtf\" is not specified." + echo "" + echo " --additional_fasta" + echo " type: file, file must exist" + echo " FASTA file to concatenate to genome FASTA file e.g. containing spike-in" + echo " sequences." + echo "" + echo " --transcript_fasta" + echo " type: file, file must exist" + echo " Path to FASTA transcriptome file." + echo "" + echo " --gene_bed" + echo " type: file, file must exist" + echo " Path to BED file containing gene intervals. This will be created from" + echo " the GTF file if not specified." + echo "" + echo " --splicesites" + echo " type: file, file must exist" + echo " Splice sites file required for HISAT2." + echo "" + echo " --skip_bbsplit" + echo " type: boolean" + echo " Skip BBSplit for removal of non-reference genome reads." + echo "" + echo " --bbsplit_fasta_list" + echo " type: file, multiple values allowed, file must exist" + echo " List of reference genomes (separated by \";\") to filter reads against" + echo " with BBSplit." + echo "" + echo " --star_index" + echo " type: file, file must exist" + echo " Path to directory or tar.gz archive for pre-built STAR index." + echo "" + echo " --star_sjdb_gtf_feature_exon" + echo " type: string" + echo " Feature type in GTF file to be used as exons for building transcripts" + echo "" + echo " --rsem_index" + echo " type: file, file must exist" + echo " Path to directory or tar.gz archive for pre-built RSEM index." + echo "" + echo " --salmon_index" + echo " type: file, file must exist" + echo " Path to directory or tar.gz archive for pre-built Salmon index." + echo "" + echo " --kallisto_index" + echo " type: file, file must exist" + echo " Path to directory or tar.gz archive for pre-built Kallisto index." + echo "" + echo " --bbsplit_index" + echo " type: file, file must exist" + echo " Path to directory or tar.gz archive for pre-built BBSplit index." + echo "" + echo " --pseudo_aligner_kmer_size" + echo " type: integer" + echo " default: 31" + echo " Kmer length passed to indexing step of pseudoaligners." + echo "" + echo " --gencode" + echo " type: boolean" + echo " Specify if the GTF annotation is in GENCODE format." + echo "" + echo " --biotype" + echo " type: string" + echo " Biotype value to use while appending entries to GTF file when additional" + echo " fasta file is provided." + echo "" + echo " --filter_gtf" + echo " type: boolean" + echo " Whether to filter the GTF or not?" + echo "" + echo " --aligner" + echo " type: string" + echo " default: star_salmon" + echo " choices: [ star_salmon, star_rsem, hisat2 ]" + echo " Specifies the alignment algorithm to use - available options are" + echo " 'star_salmon', 'star_rsem' and 'hisat2'." + echo "" + echo " --pseudo_aligner" + echo " type: string" + echo " default: salmon" + echo " choices: [ salmon, kallisto ]" + echo " Specifies the pseudo aligner to use - available options are 'salmon'." + echo " Runs in addition to '--aligner'." + echo "" + echo " --skip_alignment" + echo " type: boolean_true" + echo " Skip all of the alignment-based processes within the pipeline." + echo "" + echo "Output:" + echo " --fasta_uncompressed" + echo " type: file, output, file must exist" + echo " default: reference_genome.fasta" + echo "" + echo " --gtf_uncompressed" + echo " type: file, output, file must exist" + echo " default: gene_annotation.gtf" + echo "" + echo " --transcript_fasta_uncompressed" + echo " type: file, output, file must exist" + echo " default: transcriptome.fasta" + echo "" + echo " --gene_bed_uncompressed" + echo " type: file, output, file must exist" + echo " default: gene_annotation.bed" + echo "" + echo " --star_index_uncompressed" + echo " type: file, output, file must exist" + echo " default: STAR_index" + echo " Path to STAR index." + echo "" + echo " --rsem_index_uncompressed" + echo " type: file, output, file must exist" + echo " default: RSEM_index" + echo " Path to directory or tar.gz archive for pre-built RSEM index." + echo "" + echo " --salmon_index_uncompressed" + echo " type: file, output, file must exist" + echo " default: Salmon_index" + echo " Path to Salmon index." + echo "" + echo " --kallisto_index_uncompressed" + echo " type: file, output, file must exist" + echo " default: Kallisto_index" + echo " Path to Kallisto index." + echo "" + echo " --bbsplit_index_uncompressed" + echo " type: file, output, file must exist" + echo " default: BBSplit_index" + echo " Path to BBSplit index." + echo "" + echo " --chrom_sizes" + echo " type: file, output, file must exist" + echo " default: reference_genome.fasta.sizes" + echo " File containing chromosome lengths" + echo "" + echo " --fai" + echo " type: file, output, file must exist" + echo " default: reference_genome.fasta.fai" + echo " FASTA index file" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "prepare_genome v0.3.0" + exit + ;; + --fasta) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fasta=*) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta=*\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf=*) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gff) + [ -n "$VIASH_PAR_GFF" ] && ViashError Bad arguments for option \'--gff\': \'$VIASH_PAR_GFF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GFF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gff. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gff=*) + [ -n "$VIASH_PAR_GFF" ] && ViashError Bad arguments for option \'--gff=*\': \'$VIASH_PAR_GFF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GFF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --additional_fasta) + [ -n "$VIASH_PAR_ADDITIONAL_FASTA" ] && ViashError Bad arguments for option \'--additional_fasta\': \'$VIASH_PAR_ADDITIONAL_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ADDITIONAL_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --additional_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --additional_fasta=*) + [ -n "$VIASH_PAR_ADDITIONAL_FASTA" ] && ViashError Bad arguments for option \'--additional_fasta=*\': \'$VIASH_PAR_ADDITIONAL_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ADDITIONAL_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcript_fasta) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--transcript_fasta\': \'$VIASH_PAR_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcript_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcript_fasta=*) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--transcript_fasta=*\': \'$VIASH_PAR_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gene_bed) + [ -n "$VIASH_PAR_GENE_BED" ] && ViashError Bad arguments for option \'--gene_bed\': \'$VIASH_PAR_GENE_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENE_BED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gene_bed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gene_bed=*) + [ -n "$VIASH_PAR_GENE_BED" ] && ViashError Bad arguments for option \'--gene_bed=*\': \'$VIASH_PAR_GENE_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENE_BED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --splicesites) + [ -n "$VIASH_PAR_SPLICESITES" ] && ViashError Bad arguments for option \'--splicesites\': \'$VIASH_PAR_SPLICESITES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SPLICESITES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --splicesites. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --splicesites=*) + [ -n "$VIASH_PAR_SPLICESITES" ] && ViashError Bad arguments for option \'--splicesites=*\': \'$VIASH_PAR_SPLICESITES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SPLICESITES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_bbsplit) + [ -n "$VIASH_PAR_SKIP_BBSPLIT" ] && ViashError Bad arguments for option \'--skip_bbsplit\': \'$VIASH_PAR_SKIP_BBSPLIT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_BBSPLIT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_bbsplit. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_bbsplit=*) + [ -n "$VIASH_PAR_SKIP_BBSPLIT" ] && ViashError Bad arguments for option \'--skip_bbsplit=*\': \'$VIASH_PAR_SKIP_BBSPLIT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_BBSPLIT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bbsplit_fasta_list) + if [ -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then + VIASH_PAR_BBSPLIT_FASTA_LIST="$2" + else + VIASH_PAR_BBSPLIT_FASTA_LIST="$VIASH_PAR_BBSPLIT_FASTA_LIST;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bbsplit_fasta_list. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bbsplit_fasta_list=*) + if [ -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then + VIASH_PAR_BBSPLIT_FASTA_LIST=$(ViashRemoveFlags "$1") + else + VIASH_PAR_BBSPLIT_FASTA_LIST="$VIASH_PAR_BBSPLIT_FASTA_LIST;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --star_index) + [ -n "$VIASH_PAR_STAR_INDEX" ] && ViashError Bad arguments for option \'--star_index\': \'$VIASH_PAR_STAR_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_index=*) + [ -n "$VIASH_PAR_STAR_INDEX" ] && ViashError Bad arguments for option \'--star_index=*\': \'$VIASH_PAR_STAR_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --star_sjdb_gtf_feature_exon) + [ -n "$VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON" ] && ViashError Bad arguments for option \'--star_sjdb_gtf_feature_exon\': \'$VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_sjdb_gtf_feature_exon. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_sjdb_gtf_feature_exon=*) + [ -n "$VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON" ] && ViashError Bad arguments for option \'--star_sjdb_gtf_feature_exon=*\': \'$VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON=$(ViashRemoveFlags "$1") + shift 1 + ;; + --rsem_index) + [ -n "$VIASH_PAR_RSEM_INDEX" ] && ViashError Bad arguments for option \'--rsem_index\': \'$VIASH_PAR_RSEM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --rsem_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --rsem_index=*) + [ -n "$VIASH_PAR_RSEM_INDEX" ] && ViashError Bad arguments for option \'--rsem_index=*\': \'$VIASH_PAR_RSEM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --salmon_index) + [ -n "$VIASH_PAR_SALMON_INDEX" ] && ViashError Bad arguments for option \'--salmon_index\': \'$VIASH_PAR_SALMON_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_index=*) + [ -n "$VIASH_PAR_SALMON_INDEX" ] && ViashError Bad arguments for option \'--salmon_index=*\': \'$VIASH_PAR_SALMON_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --kallisto_index) + [ -n "$VIASH_PAR_KALLISTO_INDEX" ] && ViashError Bad arguments for option \'--kallisto_index\': \'$VIASH_PAR_KALLISTO_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --kallisto_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --kallisto_index=*) + [ -n "$VIASH_PAR_KALLISTO_INDEX" ] && ViashError Bad arguments for option \'--kallisto_index=*\': \'$VIASH_PAR_KALLISTO_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bbsplit_index) + [ -n "$VIASH_PAR_BBSPLIT_INDEX" ] && ViashError Bad arguments for option \'--bbsplit_index\': \'$VIASH_PAR_BBSPLIT_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BBSPLIT_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bbsplit_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bbsplit_index=*) + [ -n "$VIASH_PAR_BBSPLIT_INDEX" ] && ViashError Bad arguments for option \'--bbsplit_index=*\': \'$VIASH_PAR_BBSPLIT_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BBSPLIT_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_aligner_kmer_size) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE" ] && ViashError Bad arguments for option \'--pseudo_aligner_kmer_size\': \'$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_aligner_kmer_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_aligner_kmer_size=*) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE" ] && ViashError Bad arguments for option \'--pseudo_aligner_kmer_size=*\': \'$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gencode) + [ -n "$VIASH_PAR_GENCODE" ] && ViashError Bad arguments for option \'--gencode\': \'$VIASH_PAR_GENCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENCODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gencode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gencode=*) + [ -n "$VIASH_PAR_GENCODE" ] && ViashError Bad arguments for option \'--gencode=*\': \'$VIASH_PAR_GENCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENCODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --biotype) + [ -n "$VIASH_PAR_BIOTYPE" ] && ViashError Bad arguments for option \'--biotype\': \'$VIASH_PAR_BIOTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIOTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --biotype. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --biotype=*) + [ -n "$VIASH_PAR_BIOTYPE" ] && ViashError Bad arguments for option \'--biotype=*\': \'$VIASH_PAR_BIOTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIOTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --filter_gtf) + [ -n "$VIASH_PAR_FILTER_GTF" ] && ViashError Bad arguments for option \'--filter_gtf\': \'$VIASH_PAR_FILTER_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FILTER_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --filter_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --filter_gtf=*) + [ -n "$VIASH_PAR_FILTER_GTF" ] && ViashError Bad arguments for option \'--filter_gtf=*\': \'$VIASH_PAR_FILTER_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FILTER_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --aligner) + [ -n "$VIASH_PAR_ALIGNER" ] && ViashError Bad arguments for option \'--aligner\': \'$VIASH_PAR_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --aligner. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --aligner=*) + [ -n "$VIASH_PAR_ALIGNER" ] && ViashError Bad arguments for option \'--aligner=*\': \'$VIASH_PAR_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_aligner) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER" ] && ViashError Bad arguments for option \'--pseudo_aligner\': \'$VIASH_PAR_PSEUDO_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_aligner. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_aligner=*) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER" ] && ViashError Bad arguments for option \'--pseudo_aligner=*\': \'$VIASH_PAR_PSEUDO_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_alignment) + [ -n "$VIASH_PAR_SKIP_ALIGNMENT" ] && ViashError Bad arguments for option \'--skip_alignment\': \'$VIASH_PAR_SKIP_ALIGNMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_ALIGNMENT=true + shift 1 + ;; + --fasta_uncompressed) + [ -n "$VIASH_PAR_FASTA_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--fasta_uncompressed\': \'$VIASH_PAR_FASTA_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA_UNCOMPRESSED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fasta_uncompressed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fasta_uncompressed=*) + [ -n "$VIASH_PAR_FASTA_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--fasta_uncompressed=*\': \'$VIASH_PAR_FASTA_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA_UNCOMPRESSED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf_uncompressed) + [ -n "$VIASH_PAR_GTF_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--gtf_uncompressed\': \'$VIASH_PAR_GTF_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_UNCOMPRESSED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_uncompressed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf_uncompressed=*) + [ -n "$VIASH_PAR_GTF_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--gtf_uncompressed=*\': \'$VIASH_PAR_GTF_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_UNCOMPRESSED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcript_fasta_uncompressed) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--transcript_fasta_uncompressed\': \'$VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcript_fasta_uncompressed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcript_fasta_uncompressed=*) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--transcript_fasta_uncompressed=*\': \'$VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gene_bed_uncompressed) + [ -n "$VIASH_PAR_GENE_BED_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--gene_bed_uncompressed\': \'$VIASH_PAR_GENE_BED_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENE_BED_UNCOMPRESSED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gene_bed_uncompressed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gene_bed_uncompressed=*) + [ -n "$VIASH_PAR_GENE_BED_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--gene_bed_uncompressed=*\': \'$VIASH_PAR_GENE_BED_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENE_BED_UNCOMPRESSED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --star_index_uncompressed) + [ -n "$VIASH_PAR_STAR_INDEX_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--star_index_uncompressed\': \'$VIASH_PAR_STAR_INDEX_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_INDEX_UNCOMPRESSED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_index_uncompressed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_index_uncompressed=*) + [ -n "$VIASH_PAR_STAR_INDEX_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--star_index_uncompressed=*\': \'$VIASH_PAR_STAR_INDEX_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_INDEX_UNCOMPRESSED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --rsem_index_uncompressed) + [ -n "$VIASH_PAR_RSEM_INDEX_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--rsem_index_uncompressed\': \'$VIASH_PAR_RSEM_INDEX_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_INDEX_UNCOMPRESSED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --rsem_index_uncompressed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --rsem_index_uncompressed=*) + [ -n "$VIASH_PAR_RSEM_INDEX_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--rsem_index_uncompressed=*\': \'$VIASH_PAR_RSEM_INDEX_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_INDEX_UNCOMPRESSED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --salmon_index_uncompressed) + [ -n "$VIASH_PAR_SALMON_INDEX_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--salmon_index_uncompressed\': \'$VIASH_PAR_SALMON_INDEX_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_INDEX_UNCOMPRESSED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_index_uncompressed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_index_uncompressed=*) + [ -n "$VIASH_PAR_SALMON_INDEX_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--salmon_index_uncompressed=*\': \'$VIASH_PAR_SALMON_INDEX_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_INDEX_UNCOMPRESSED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --kallisto_index_uncompressed) + [ -n "$VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--kallisto_index_uncompressed\': \'$VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --kallisto_index_uncompressed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --kallisto_index_uncompressed=*) + [ -n "$VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--kallisto_index_uncompressed=*\': \'$VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bbsplit_index_uncompressed) + [ -n "$VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--bbsplit_index_uncompressed\': \'$VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bbsplit_index_uncompressed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bbsplit_index_uncompressed=*) + [ -n "$VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED" ] && ViashError Bad arguments for option \'--bbsplit_index_uncompressed=*\': \'$VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chrom_sizes) + [ -n "$VIASH_PAR_CHROM_SIZES" ] && ViashError Bad arguments for option \'--chrom_sizes\': \'$VIASH_PAR_CHROM_SIZES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHROM_SIZES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chrom_sizes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chrom_sizes=*) + [ -n "$VIASH_PAR_CHROM_SIZES" ] && ViashError Bad arguments for option \'--chrom_sizes=*\': \'$VIASH_PAR_CHROM_SIZES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHROM_SIZES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fai) + [ -n "$VIASH_PAR_FAI" ] && ViashError Bad arguments for option \'--fai\': \'$VIASH_PAR_FAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAI="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fai. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fai=*) + [ -n "$VIASH_PAR_FAI" ] && ViashError Bad arguments for option \'--fai=*\': \'$VIASH_PAR_FAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FAI=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: native." + exit 1 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_FASTA+x} ]; then + ViashError '--fasta' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE+x} ]; then + VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE="31" +fi +if [ -z ${VIASH_PAR_ALIGNER+x} ]; then + VIASH_PAR_ALIGNER="star_salmon" +fi +if [ -z ${VIASH_PAR_PSEUDO_ALIGNER+x} ]; then + VIASH_PAR_PSEUDO_ALIGNER="salmon" +fi +if [ -z ${VIASH_PAR_SKIP_ALIGNMENT+x} ]; then + VIASH_PAR_SKIP_ALIGNMENT="false" +fi +if [ -z ${VIASH_PAR_FASTA_UNCOMPRESSED+x} ]; then + VIASH_PAR_FASTA_UNCOMPRESSED="reference_genome.fasta" +fi +if [ -z ${VIASH_PAR_GTF_UNCOMPRESSED+x} ]; then + VIASH_PAR_GTF_UNCOMPRESSED="gene_annotation.gtf" +fi +if [ -z ${VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED+x} ]; then + VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED="transcriptome.fasta" +fi +if [ -z ${VIASH_PAR_GENE_BED_UNCOMPRESSED+x} ]; then + VIASH_PAR_GENE_BED_UNCOMPRESSED="gene_annotation.bed" +fi +if [ -z ${VIASH_PAR_STAR_INDEX_UNCOMPRESSED+x} ]; then + VIASH_PAR_STAR_INDEX_UNCOMPRESSED="STAR_index" +fi +if [ -z ${VIASH_PAR_RSEM_INDEX_UNCOMPRESSED+x} ]; then + VIASH_PAR_RSEM_INDEX_UNCOMPRESSED="RSEM_index" +fi +if [ -z ${VIASH_PAR_SALMON_INDEX_UNCOMPRESSED+x} ]; then + VIASH_PAR_SALMON_INDEX_UNCOMPRESSED="Salmon_index" +fi +if [ -z ${VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED+x} ]; then + VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED="Kallisto_index" +fi +if [ -z ${VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED+x} ]; then + VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED="BBSplit_index" +fi +if [ -z ${VIASH_PAR_CHROM_SIZES+x} ]; then + VIASH_PAR_CHROM_SIZES="reference_genome.fasta.sizes" +fi +if [ -z ${VIASH_PAR_FAI+x} ]; then + VIASH_PAR_FAI="reference_genome.fasta.fai" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTA" ] && [ ! -e "$VIASH_PAR_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then + ViashError "Input file '$VIASH_PAR_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GFF" ] && [ ! -e "$VIASH_PAR_GFF" ]; then + ViashError "Input file '$VIASH_PAR_GFF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_ADDITIONAL_FASTA" ] && [ ! -e "$VIASH_PAR_ADDITIONAL_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_ADDITIONAL_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPT_FASTA" ] && [ ! -e "$VIASH_PAR_TRANSCRIPT_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_TRANSCRIPT_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENE_BED" ] && [ ! -e "$VIASH_PAR_GENE_BED" ]; then + ViashError "Input file '$VIASH_PAR_GENE_BED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SPLICESITES" ] && [ ! -e "$VIASH_PAR_SPLICESITES" ]; then + ViashError "Input file '$VIASH_PAR_SPLICESITES' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_BBSPLIT_FASTA_LIST; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_STAR_INDEX" ] && [ ! -e "$VIASH_PAR_STAR_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_STAR_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RSEM_INDEX" ] && [ ! -e "$VIASH_PAR_RSEM_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_RSEM_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SALMON_INDEX" ] && [ ! -e "$VIASH_PAR_SALMON_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_SALMON_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_KALLISTO_INDEX" ] && [ ! -e "$VIASH_PAR_KALLISTO_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_KALLISTO_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BBSPLIT_INDEX" ] && [ ! -e "$VIASH_PAR_BBSPLIT_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_BBSPLIT_INDEX' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_SKIP_BBSPLIT" ]]; then + if ! [[ "$VIASH_PAR_SKIP_BBSPLIT" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_bbsplit' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE" ]]; then + if ! [[ "$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--pseudo_aligner_kmer_size' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_GENCODE" ]]; then + if ! [[ "$VIASH_PAR_GENCODE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--gencode' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_FILTER_GTF" ]]; then + if ! [[ "$VIASH_PAR_FILTER_GTF" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--filter_gtf' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_ALIGNMENT" ]]; then + if ! [[ "$VIASH_PAR_SKIP_ALIGNMENT" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_alignment' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_ALIGNER" ]; then + VIASH_PAR_ALIGNER_CHOICES=("star_salmon;star_rsem;hisat2") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_ALIGNER_CHOICES[*]};" =~ ";$VIASH_PAR_ALIGNER;" ]]; then + ViashError '--aligner' specified value of \'$VIASH_PAR_ALIGNER\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_PSEUDO_ALIGNER" ]; then + VIASH_PAR_PSEUDO_ALIGNER_CHOICES=("salmon;kallisto") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_PSEUDO_ALIGNER_CHOICES[*]};" =~ ";$VIASH_PAR_PSEUDO_ALIGNER;" ]]; then + ViashError '--pseudo_aligner' specified value of \'$VIASH_PAR_PSEUDO_ALIGNER\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_FASTA_UNCOMPRESSED" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTA_UNCOMPRESSED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTA_UNCOMPRESSED")" +fi +if [ ! -z "$VIASH_PAR_GTF_UNCOMPRESSED" ] && [ ! -d "$(dirname "$VIASH_PAR_GTF_UNCOMPRESSED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GTF_UNCOMPRESSED")" +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED" ] && [ ! -d "$(dirname "$VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED")" +fi +if [ ! -z "$VIASH_PAR_GENE_BED_UNCOMPRESSED" ] && [ ! -d "$(dirname "$VIASH_PAR_GENE_BED_UNCOMPRESSED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENE_BED_UNCOMPRESSED")" +fi +if [ ! -z "$VIASH_PAR_STAR_INDEX_UNCOMPRESSED" ] && [ ! -d "$(dirname "$VIASH_PAR_STAR_INDEX_UNCOMPRESSED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STAR_INDEX_UNCOMPRESSED")" +fi +if [ ! -z "$VIASH_PAR_RSEM_INDEX_UNCOMPRESSED" ] && [ ! -d "$(dirname "$VIASH_PAR_RSEM_INDEX_UNCOMPRESSED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_RSEM_INDEX_UNCOMPRESSED")" +fi +if [ ! -z "$VIASH_PAR_SALMON_INDEX_UNCOMPRESSED" ] && [ ! -d "$(dirname "$VIASH_PAR_SALMON_INDEX_UNCOMPRESSED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_SALMON_INDEX_UNCOMPRESSED")" +fi +if [ ! -z "$VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED" ] && [ ! -d "$(dirname "$VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED")" +fi +if [ ! -z "$VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED" ] && [ ! -d "$(dirname "$VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED")" +fi +if [ ! -z "$VIASH_PAR_CHROM_SIZES" ] && [ ! -d "$(dirname "$VIASH_PAR_CHROM_SIZES")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_CHROM_SIZES")" +fi +if [ ! -z "$VIASH_PAR_FAI" ] && [ ! -d "$(dirname "$VIASH_PAR_FAI")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FAI")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + + +# set dependency paths +VIASH_DEP_GUNZIP="$VIASH_META_RESOURCES_DIR/../../../nextflow/gunzip/main.nf" +VIASH_DEP_CAT_ADDITIONAL_FASTA="$VIASH_META_RESOURCES_DIR/../../../nextflow/cat_additional_fasta/main.nf" +VIASH_DEP_GTF2BED="$VIASH_META_RESOURCES_DIR/../../../nextflow/gtf2bed/main.nf" +VIASH_DEP_PREPROCESS_TRANSCRIPTS_FASTA="$VIASH_META_RESOURCES_DIR/../../../nextflow/preprocess_transcripts_fasta/main.nf" +VIASH_DEP_GTF_FILTER="$VIASH_META_RESOURCES_DIR/../../../nextflow/gtf_filter/main.nf" +VIASH_DEP_GETCHROMSIZES="$VIASH_META_RESOURCES_DIR/../../../nextflow/getchromsizes/main.nf" +VIASH_DEP_GFFREAD="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/main.nf" +VIASH_DEP_RSEM_RSEM_PREPARE_REFERENCE="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/main.nf" +VIASH_DEP_UNTAR="$VIASH_TARGET_DIR/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/main.nf" +VIASH_DEP_STAR_STAR_GENOME_GENERATE="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/main.nf" +VIASH_DEP_BBMAP_BBMAP_BBSPLIT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/main.nf" +VIASH_DEP_SALMON_SALMON_INDEX="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/main.nf" +VIASH_DEP_KALLISTO_KALLISTO_INDEX="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/main.nf" + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-prepare_genome-XXXXXX").nf +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +//// VIASH START +// The following code has been auto-generated by Viash. + +//// VIASH END +workflow run_wf { + + take: + input_ch + + main: + output_ch = input_ch + + // Uncompress fasta + | gunzip.run ( + fromState: [ "input": "fasta" ], + toState: [ "fasta": "output" ], + key: "gunzip_fasta", + args: [ output: "reference_genome.fasta" ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress gtf + | gunzip.run ( + runIf: {id, state -> state.gtf}, + fromState: [ "input": "gtf" ], + toState: [ "gtf": "output" ], + key: "gunzip_gtf", + args: [output: "gene_annotation.gtf"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress gff + | gunzip.run ( + runIf: {id, state -> !state.gtf && state.gff}, + fromState: [ "input": "gff" ], + toState: [ "gff": "output" ], + key: "gunzip_gff", + args: [output: "gene_annotation.gff"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // gff to gtf + | gffread.run ( + runIf: {id, state -> !state.gtf && state.gff}, + fromState: [ + "input": "gff", + "genome": "fasta" + ], + toState: [ "gtf": "outfile" ], + args: [ + outfile: "gene_annotation.gtf", + gtf_output: true, + keep_attrs: true, + keep_exon_attrs: true + ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | gtf_filter.run( + runIf: {id, state -> state.gtf && state.filter_gtf}, + fromState: [ + "fasta": "fasta", + "gtf": "gtf" + ], + toState: [ "gtf": "filtered_gtf" ], + args: [filtered_gtf: "gene_annotation.gtf"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress additional fasta + | gunzip.run ( + runIf: {id, state -> state.additional_fasta}, + fromState: [ "input": "additional_fasta" ], + toState: [ "additional_fasta": "output" ], + key: "gunzip_additional_fasta", + args: [output: "additional.fasta"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // concatenate additional fasta + | cat_additional_fasta.run ( + runIf: {id, state -> state.additional_fasta}, + fromState: [ + "fasta": "fasta", + "gtf": "gtf", + "additional_fasta": "additional_fasta", + "biotype": "biotype" + ], + toState: [ + "fasta": "fasta_output", + "gtf": "gtf_output" + ], + args: [ + fasta_output: "genome_additional.fasta", + gtf_output: "genome_additional.gtf" + ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress bed file + | gunzip.run ( + runIf: {id, state -> state.gene_bed}, + fromState: [ "input": "gene_bed" ], + toState: [ "gene_bed": "output" ], + key: "gunzip_gene_bed", + args: [output: "genome_additional.bed"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // gtf to bed + | gtf2bed.run ( + runIf: { id, state -> !state.gene_bed}, + fromState: [ "gtf": "gtf" ], + toState: [ "gene_bed": "bed_output" ], + args: [bed_output: "genome_additional.bed"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress transcript fasta + | gunzip.run ( + runIf: {id, state -> state.transcript_fasta}, + fromState: [ "input": "transcript_fasta" ], + toState: [ "transcript_fasta": "output" ], + key: "transcript_fasta", + args: [output: "transcriptome.fasta"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // preprocess transcripts fasta if gtf is in gencode format + | preprocess_transcripts_fasta.run ( + runIf: {id, state -> state.transcript_fasta && state.gencode}, + fromState: [ "transcript_fasta": "transcript_fasta" ], + toState: [ "transcript_fasta": "output" ], + args: [output: "transcriptome.fasta"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // make transcript FASTA if not provided + | rsem_prepare_reference.run ( + runIf: {id, state -> !state.transcript_fasta}, + fromState: [ + "reference_fasta_files": "fasta", + "gtf": "gtf" + ], + toState: [ "make_transcript_fasta_output": "output" ], + key: "make_transcript_fasta", + args: [reference_name: "genome"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + | map { id, state -> + def transcript_fasta = (!state.transcript_fasta) ? + state.make_transcript_fasta_output.listFiles().find{it.name == "genome.transcripts.fa"} : + state.transcript_fasta + [ id, state + [transcript_fasta: transcript_fasta] ] + } + + // chromosome size and fai index + | getchromsizes.run ( + fromState: [ "fasta": "fasta" ], + toState: [ + "fai": "fai", + "sizes": "sizes" + ], + key: "chromsizes", + args: [ + fai: "genome_additional.fasta.fai", + sizes: "genome_additional.fasta.sizes" + ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // untar bbsplit index, if available + | untar.run ( + runIf: {id, state -> state.bbsplit_index}, + fromState: [ "input": "bbsplit_index" ], + toState: [ "bbsplit_index": "output" ], + key: "untar_bbsplit_index", + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | map { id, state -> + // Check if bbsplit_fasta_list is defined + def ref = (state.bbsplit_fasta_list) ? + [state.fasta] + state.bbsplit_fasta_list : + [state.fasta] + [id, state + [bbsplit_ref: ref] ] + } + + // create bbsplit index, if not already available + | bbmap_bbsplit.run ( + runIf: {id, state -> !state.skip_bbsplit && !state.bbsplit_index}, + fromState: ["ref": "bbsplit_ref"], + toState: [ "bbsplit_index": "index" ], + args: [ + only_build_index: true, + index: "BBSplit_index" + ], + key: "generate_bbsplit_index" + ) + + // Uncompress STAR index or generate from scratch if required + | untar.run ( + runIf: {id, state -> state.star_index}, + fromState: [ "input": "star_index" ], + toState: [ "star_index": "output" ], + key: "untar_star_index", + args: [output: "STAR_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | star_genome_generate.run ( + runIf: {id, state -> !state.star_index && !state.skip_alignment}, + fromState: [ + "genome_fasta_files": "fasta", + "sjdb_gtf_file": "gtf", + "sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon" + ], + toState: [ "star_index": "index" ], + key: "generate_star_index", + args: [index: "STAR_index"], + directives: [ label: [ "highmem", "highcpu" ] ] + ) + + // Uncompress RSEM index or generate from scratch if required + | untar.run ( + runIf: {id, state -> state.rsem_index}, + fromState: [ "input": "rsem_index" ], + toState: [ "rsem_index": "output" ], + key: "untar_rsem_index", + args: [output: "RSEM_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | rsem_prepare_reference.run ( + runIf: {id, state -> !state.rsem_index && state.aligner == 'star_rsem'}, + fromState: [ + "reference_fasta_files": "fasta", + "gtf": "gtf" + ], + toState: [ "rsem_index": "output" ], + key: "generate_rsem_index", + args: [reference_name: "genome"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // TODO: Uncompress HISAT2 index or generate from scratch if required + + // Uncompress Salmon index or generate from scratch if required + | untar.run ( + runIf: {id, state -> state.salmon_index}, + fromState: [ "input": "salmon_index" ], + toState: [ "salmon_index": "output" ], + key: "untar_salmon_index", + args: [output: "Salmon_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | salmon_index.run ( + runIf: {id, state -> (state.aligner == 'star_salmon' || state.pseudo_aligner == "salmon") && !state.salmon_index}, + fromState: [ + "genome": "fasta", + "transcripts": "transcript_fasta", + "kmer_len": "pseudo_aligner_kmer_size", + "gencode": "gencode" + ], + toState: [ "salmon_index": "index" ], + key: "generate_salmon_index", + args: [index: "Salmon_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // Uncompress Kallisto index or generate from scratch if required + | untar.run ( + runIf: {id, state -> state.kallisto_index}, + fromState: [ "input": "kallisto_index" ], + toState: [ "kallisto_index": "output" ], + key: "untar_kallisto_index", + args: [output: "Kallisto_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | kallisto_index.run( + runIf: {id, state -> state.pseudo_aligner == "kallisto" && !state.kallisto_index}, + fromState: [ + "input": "transcript_fasta", + "kmer_size": "pseudo_aligner_kmer_size" + ], + toState: [ "kallisto_index": "index" ], + key: "generate_kallisto_index", + args: [index: "Kallisto_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + "fasta_uncompressed": "fasta", + "gtf_uncompressed": "gtf", + "transcript_fasta_uncompressed": "transcript_fasta", + "gene_bed_uncompressed": "gene_bed", + "star_index_uncompressed": "star_index", + "salmon_index_uncompressed": "salmon_index", + "kallisto_index_uncompressed": "kallisto_index", + "bbsplit_index_uncompressed": "bbsplit_index", + "rsem_index_uncompressed": "rsem_index", + "chrom_sizes": "sizes", + "fai": "fai" + ) + + emit: + output_ch +} +VIASHMAIN +nextflow run . -main-script "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTA_UNCOMPRESSED" ] && [ ! -e "$VIASH_PAR_FASTA_UNCOMPRESSED" ]; then + ViashError "Output file '$VIASH_PAR_FASTA_UNCOMPRESSED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GTF_UNCOMPRESSED" ] && [ ! -e "$VIASH_PAR_GTF_UNCOMPRESSED" ]; then + ViashError "Output file '$VIASH_PAR_GTF_UNCOMPRESSED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED" ] && [ ! -e "$VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED" ]; then + ViashError "Output file '$VIASH_PAR_TRANSCRIPT_FASTA_UNCOMPRESSED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENE_BED_UNCOMPRESSED" ] && [ ! -e "$VIASH_PAR_GENE_BED_UNCOMPRESSED" ]; then + ViashError "Output file '$VIASH_PAR_GENE_BED_UNCOMPRESSED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STAR_INDEX_UNCOMPRESSED" ] && [ ! -e "$VIASH_PAR_STAR_INDEX_UNCOMPRESSED" ]; then + ViashError "Output file '$VIASH_PAR_STAR_INDEX_UNCOMPRESSED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RSEM_INDEX_UNCOMPRESSED" ] && [ ! -e "$VIASH_PAR_RSEM_INDEX_UNCOMPRESSED" ]; then + ViashError "Output file '$VIASH_PAR_RSEM_INDEX_UNCOMPRESSED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SALMON_INDEX_UNCOMPRESSED" ] && [ ! -e "$VIASH_PAR_SALMON_INDEX_UNCOMPRESSED" ]; then + ViashError "Output file '$VIASH_PAR_SALMON_INDEX_UNCOMPRESSED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED" ] && [ ! -e "$VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED" ]; then + ViashError "Output file '$VIASH_PAR_KALLISTO_INDEX_UNCOMPRESSED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED" ] && [ ! -e "$VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED" ]; then + ViashError "Output file '$VIASH_PAR_BBSPLIT_INDEX_UNCOMPRESSED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_CHROM_SIZES" ] && [ ! -e "$VIASH_PAR_CHROM_SIZES" ]; then + ViashError "Output file '$VIASH_PAR_CHROM_SIZES' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_FAI" ] && [ ! -e "$VIASH_PAR_FAI" ]; then + ViashError "Output file '$VIASH_PAR_FAI' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/workflows/pseudo_alignment_and_quant/.config.vsh.yaml b/target/executable/workflows/pseudo_alignment_and_quant/.config.vsh.yaml new file mode 100644 index 0000000..8f7f8c3 --- /dev/null +++ b/target/executable/workflows/pseudo_alignment_and_quant/.config.vsh.yaml @@ -0,0 +1,328 @@ +name: "pseudo_alignment_and_quant" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "ID of the sample." + info: null + example: + - "foo" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_1" + alternatives: + - "-i" + description: "Path to the sample (or read 1 of paired end sample)." + info: null + example: + - "input.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Path to read 2 of the sample." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, or\ + \ reverse" + info: null + required: false + choices: + - "forward" + - "reverse" + - "unstranded" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "GTF file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcript_fasta" + description: "Fasta file of the reference transcriptome." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--pseudo_aligner" + description: "Specifies the pseudo aligner to use - available options are 'salmon'.\ + \ Runs in addition to '--aligner'." + info: null + default: + - "false" + required: false + choices: + - "salmon" + - "kallisto" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_index" + description: "Salmon index" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--kallisto_index" + description: "Kallisto index" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--lib_type" + description: "Override library type inferred based on strandedness defined in\ + \ meta object" + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--kallisto_quant_fragment_length" + description: "For single-end mode only, the estimated average fragment length\ + \ to use for quantification with Kallisto." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--kallisto_quant_fragment_length_sd" + description: "For single-end mode only, the estimated standard deviation of the\ + \ fragment length for quantification with Kallisto." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--pseudo_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_out_dir" + info: null + default: + - "$id.quant" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_quant_results_file" + info: null + default: + - "$id.quant.sf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--kallisto_quant_results_file" + info: null + default: + - "$id.abundance.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A viash sub-workflow for pseudo alignment and quantification stage of\ + \ nf-core/rnaseq pipeline.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "salmon/salmon_quant" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "kallisto/kallisto_quant" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/pseudo_alignment_and_quant/config.vsh.yaml" + runner: "executable" + engine: "native" + output: "target/executable/workflows/pseudo_alignment_and_quant" + executable: "target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/workflows/pseudo_alignment_and_quant/nextflow_labels.config b/target/executable/workflows/pseudo_alignment_and_quant/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/workflows/pseudo_alignment_and_quant/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant b/target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant new file mode 100755 index 0000000..4b39f84 --- /dev/null +++ b/target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant @@ -0,0 +1,946 @@ +#!/usr/bin/env bash + +# pseudo_alignment_and_quant v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="pseudo_alignment_and_quant" +VIASH_META_FUNCTIONALITY_NAME="pseudo_alignment_and_quant" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "pseudo_alignment_and_quant v0.3.0" + echo "" + echo "A viash sub-workflow for pseudo alignment and quantification stage of" + echo "nf-core/rnaseq pipeline." + echo "" + echo "Input:" + echo " --id" + echo " type: string, required parameter" + echo " example: foo" + echo " ID of the sample." + echo "" + echo " -i, --fastq_1" + echo " type: file, required parameter, file must exist" + echo " example: input.fastq.gz" + echo " Path to the sample (or read 1 of paired end sample)." + echo "" + echo " --fastq_2" + echo " type: file, file must exist" + echo " Path to read 2 of the sample." + echo "" + echo " --strandedness" + echo " type: string" + echo " choices: [ forward, reverse, unstranded ]" + echo " Sample strand-specificity. Must be one of unstranded, forward, or" + echo " reverse" + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo " GTF file" + echo "" + echo " --transcript_fasta" + echo " type: file, file must exist" + echo " Fasta file of the reference transcriptome." + echo "" + echo " --pseudo_aligner" + echo " type: string" + echo " default: false" + echo " choices: [ salmon, kallisto ]" + echo " Specifies the pseudo aligner to use - available options are 'salmon'." + echo " Runs in addition to '--aligner'." + echo "" + echo " --salmon_index" + echo " type: file, file must exist" + echo " Salmon index" + echo "" + echo " --kallisto_index" + echo " type: file, file must exist" + echo " Kallisto index" + echo "" + echo " --lib_type" + echo " type: string" + echo " default:" + echo " Override library type inferred based on strandedness defined in meta" + echo " object" + echo "" + echo " --kallisto_quant_fragment_length" + echo " type: double" + echo " For single-end mode only, the estimated average fragment length to use" + echo " for quantification with Kallisto." + echo "" + echo " --kallisto_quant_fragment_length_sd" + echo " type: double" + echo " For single-end mode only, the estimated standard deviation of the" + echo " fragment length for quantification with Kallisto." + echo "" + echo "Output:" + echo " --pseudo_multiqc" + echo " type: file, output, file must exist" + echo "" + echo " --quant_out_dir" + echo " type: file, output, file must exist" + echo " default: \$id.quant" + echo "" + echo " --salmon_quant_results_file" + echo " type: file, output, file must exist" + echo " default: \$id.quant.sf" + echo "" + echo " --kallisto_quant_results_file" + echo " type: file, output, file must exist" + echo " default: \$id.abundance.tsv" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "pseudo_alignment_and_quant v0.3.0" + exit + ;; + --id) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id=*) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id=*\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastq_1) + [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_1=*) + [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1=*\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_2) + [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_2=*) + [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2=*\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQ_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --strandedness) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --strandedness. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strandedness=*) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness=*\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf=*) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcript_fasta) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--transcript_fasta\': \'$VIASH_PAR_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcript_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcript_fasta=*) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--transcript_fasta=*\': \'$VIASH_PAR_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_aligner) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER" ] && ViashError Bad arguments for option \'--pseudo_aligner\': \'$VIASH_PAR_PSEUDO_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_aligner. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_aligner=*) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER" ] && ViashError Bad arguments for option \'--pseudo_aligner=*\': \'$VIASH_PAR_PSEUDO_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --salmon_index) + [ -n "$VIASH_PAR_SALMON_INDEX" ] && ViashError Bad arguments for option \'--salmon_index\': \'$VIASH_PAR_SALMON_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_index=*) + [ -n "$VIASH_PAR_SALMON_INDEX" ] && ViashError Bad arguments for option \'--salmon_index=*\': \'$VIASH_PAR_SALMON_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --kallisto_index) + [ -n "$VIASH_PAR_KALLISTO_INDEX" ] && ViashError Bad arguments for option \'--kallisto_index\': \'$VIASH_PAR_KALLISTO_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --kallisto_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --kallisto_index=*) + [ -n "$VIASH_PAR_KALLISTO_INDEX" ] && ViashError Bad arguments for option \'--kallisto_index=*\': \'$VIASH_PAR_KALLISTO_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lib_type) + [ -n "$VIASH_PAR_LIB_TYPE" ] && ViashError Bad arguments for option \'--lib_type\': \'$VIASH_PAR_LIB_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIB_TYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lib_type. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --lib_type=*) + [ -n "$VIASH_PAR_LIB_TYPE" ] && ViashError Bad arguments for option \'--lib_type=*\': \'$VIASH_PAR_LIB_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIB_TYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --kallisto_quant_fragment_length) + [ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" ] && ViashError Bad arguments for option \'--kallisto_quant_fragment_length\': \'$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --kallisto_quant_fragment_length. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --kallisto_quant_fragment_length=*) + [ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" ] && ViashError Bad arguments for option \'--kallisto_quant_fragment_length=*\': \'$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --kallisto_quant_fragment_length_sd) + [ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" ] && ViashError Bad arguments for option \'--kallisto_quant_fragment_length_sd\': \'$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --kallisto_quant_fragment_length_sd. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --kallisto_quant_fragment_length_sd=*) + [ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" ] && ViashError Bad arguments for option \'--kallisto_quant_fragment_length_sd=*\': \'$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_multiqc) + [ -n "$VIASH_PAR_PSEUDO_MULTIQC" ] && ViashError Bad arguments for option \'--pseudo_multiqc\': \'$VIASH_PAR_PSEUDO_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_multiqc=*) + [ -n "$VIASH_PAR_PSEUDO_MULTIQC" ] && ViashError Bad arguments for option \'--pseudo_multiqc=*\': \'$VIASH_PAR_PSEUDO_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quant_out_dir) + [ -n "$VIASH_PAR_QUANT_OUT_DIR" ] && ViashError Bad arguments for option \'--quant_out_dir\': \'$VIASH_PAR_QUANT_OUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_OUT_DIR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_out_dir. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quant_out_dir=*) + [ -n "$VIASH_PAR_QUANT_OUT_DIR" ] && ViashError Bad arguments for option \'--quant_out_dir=*\': \'$VIASH_PAR_QUANT_OUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_OUT_DIR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --salmon_quant_results_file) + [ -n "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--salmon_quant_results_file\': \'$VIASH_PAR_SALMON_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_QUANT_RESULTS_FILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_quant_results_file. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_quant_results_file=*) + [ -n "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--salmon_quant_results_file=*\': \'$VIASH_PAR_SALMON_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_QUANT_RESULTS_FILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --kallisto_quant_results_file) + [ -n "$VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--kallisto_quant_results_file\': \'$VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --kallisto_quant_results_file. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --kallisto_quant_results_file=*) + [ -n "$VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--kallisto_quant_results_file=*\': \'$VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: native." + exit 1 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_ID+x} ]; then + ViashError '--id' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_FASTQ_1+x} ]; then + ViashError '--fastq_1' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_PSEUDO_ALIGNER+x} ]; then + VIASH_PAR_PSEUDO_ALIGNER="false" +fi +if [ -z ${VIASH_PAR_LIB_TYPE+x} ]; then + VIASH_PAR_LIB_TYPE="" +fi +if [ -z ${VIASH_PAR_QUANT_OUT_DIR+x} ]; then + VIASH_PAR_QUANT_OUT_DIR="\$id.quant" +fi +if [ -z ${VIASH_PAR_SALMON_QUANT_RESULTS_FILE+x} ]; then + VIASH_PAR_SALMON_QUANT_RESULTS_FILE="\$id.quant.sf" +fi +if [ -z ${VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE+x} ]; then + VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE="\$id.abundance.tsv" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTQ_1" ] && [ ! -e "$VIASH_PAR_FASTQ_1" ]; then + ViashError "Input file '$VIASH_PAR_FASTQ_1' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_FASTQ_2" ] && [ ! -e "$VIASH_PAR_FASTQ_2" ]; then + ViashError "Input file '$VIASH_PAR_FASTQ_2' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then + ViashError "Input file '$VIASH_PAR_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPT_FASTA" ] && [ ! -e "$VIASH_PAR_TRANSCRIPT_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_TRANSCRIPT_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SALMON_INDEX" ] && [ ! -e "$VIASH_PAR_SALMON_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_SALMON_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_KALLISTO_INDEX" ] && [ ! -e "$VIASH_PAR_KALLISTO_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_KALLISTO_INDEX' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" ]]; then + if ! [[ "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--kallisto_quant_fragment_length' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" ]]; then + if ! [[ "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--kallisto_quant_fragment_length_sd' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_STRANDEDNESS" ]; then + VIASH_PAR_STRANDEDNESS_CHOICES=("forward;reverse;unstranded") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_STRANDEDNESS_CHOICES[*]};" =~ ";$VIASH_PAR_STRANDEDNESS;" ]]; then + ViashError '--strandedness' specified value of \'$VIASH_PAR_STRANDEDNESS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_PSEUDO_ALIGNER" ]; then + VIASH_PAR_PSEUDO_ALIGNER_CHOICES=("salmon;kallisto") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_PSEUDO_ALIGNER_CHOICES[*]};" =~ ";$VIASH_PAR_PSEUDO_ALIGNER;" ]]; then + ViashError '--pseudo_aligner' specified value of \'$VIASH_PAR_PSEUDO_ALIGNER\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_PSEUDO_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_MULTIQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_MULTIQC")" +fi +if [ ! -z "$VIASH_PAR_QUANT_OUT_DIR" ] && [ ! -d "$(dirname "$VIASH_PAR_QUANT_OUT_DIR")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUANT_OUT_DIR")" +fi +if [ ! -z "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE" ] && [ ! -d "$(dirname "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE")" +fi +if [ ! -z "$VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE" ] && [ ! -d "$(dirname "$VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + + +# set dependency paths +VIASH_DEP_SALMON_SALMON_QUANT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/main.nf" +VIASH_DEP_KALLISTO_KALLISTO_QUANT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/main.nf" + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-pseudo_alignment_and_quant-XXXXXX").nf +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +//// VIASH START +// The following code has been auto-generated by Viash. + +//// VIASH END +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def paired = input.size() == 2 + [ id, state + [ paired: paired, input: input ] ] + } + + + // Infer lib-type for salmon quant + | map { id, state -> + def lib_type = (state.paired) ? + ( + (state.strandedness == "forward") ? + "ISF" : + ( + (state.strandedness == "reverse") ? "ISR" : "IU" + ) + ) + : ( + (state.strandedness == "forward") ? + "SF" : + ( + (state.strandedness == "reverse") ? "SR" : "U" + ) + ) + [ id, state + [lib_type: lib_type] ] + } + + // Count reads from BAM alignments using Salmon + | salmon_quant.run ( + runIf: { id, state -> state.pseudo_aligner == 'salmon' }, + fromState: { id, state -> + def unmated_reads = !state.paired ? state.fastq_1 : null + def mates1 = state.paired ? state.fastq_1 : null + def mates2 = state.paired ? state.fastq_2 : null + [ unmated_reads: unmated_reads, + mates1: mates1, + mates2: mates2, + gene_map: state.gtf, + index: state.salmon_index, + lib_type: state.lib_type ] + }, + toState: [ + "quant_out_dir": "output", + "salmon_quant_results_file": "quant_results" + ], + directives: [ label: [ "midmem", "highcpu" ] ] + ) + + | map { id, state -> + def mod_state = (state.pseudo_aligner == 'salmon') ? state + [pseudo_multiqc: state.quant_out_dir] : state + [ id, mod_state ] + } + + | kallisto_quant.run ( + runIf: { id, state -> state.pseudo_aligner == 'kallisto'}, + fromState: { id, state -> + def fr_stranded = state.strandedness == 'forward' + def rf_stranded = state.strandedness == 'reverse' + [ + input: state.input, + index: state.kallisto_index, + fragment_length: state.kallisto_quant_fragment_length, + sd: state.kallisto_quant_fragment_length_sd, + single: !state.paired, + fr_stranded: fr_stranded, + rf_stranded: rf_stranded, + ] + }, + args: [log: "kallisto_quant.log"], + toState: { id, output_state, state -> + def neKeys = [ + "quant_out_dir": output_state["output_dir"], + "kallisto_quant_results_file": output_state["output_dir"] + "/abundance.tsv", + "pseudo_multiqc": output_state["log"] + ] + def new_state = state + newKeys + return new_state + }, + directives: [ label: [ "midmem", "highcpu" ] ] + ) + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + [ "pseudo_multiqc": "quant_results", + "quant_out_dir": "quant_out_dir", + "salmon_quant_results_file": "salmon_quant_results_file", + "kallisto_quant_results_file": "kallisto_quant_results_file" + ] + ) + + emit: + output_ch +} +VIASHMAIN +nextflow run . -main-script "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_PSEUDO_MULTIQC" ] && [ ! -e "$VIASH_PAR_PSEUDO_MULTIQC" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_MULTIQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUANT_OUT_DIR" ] && [ ! -e "$VIASH_PAR_QUANT_OUT_DIR" ]; then + ViashError "Output file '$VIASH_PAR_QUANT_OUT_DIR' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE" ] && [ ! -e "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE" ]; then + ViashError "Output file '$VIASH_PAR_SALMON_QUANT_RESULTS_FILE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE" ] && [ ! -e "$VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE" ]; then + ViashError "Output file '$VIASH_PAR_KALLISTO_QUANT_RESULTS_FILE' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/workflows/quality_control/.config.vsh.yaml b/target/executable/workflows/quality_control/.config.vsh.yaml new file mode 100644 index 0000000..4069648 --- /dev/null +++ b/target/executable/workflows/quality_control/.config.vsh.yaml @@ -0,0 +1,1615 @@ +name: "quality_control" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "ID of the sample" + info: null + example: + - "test" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, reverse" + info: null + default: + - "unstranded" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--paired" + description: "Paired-end reads?" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam" + description: "Input genome BAM file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_index" + description: "Genome BAM index file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gene_bed" + description: "Path to BED file containing gene intervals. This will be created\ + \ from the GTF file if not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "GTF file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_group_features" + description: "Define the attribute type used to group features in the GTF file\ + \ when running Salmon." + info: null + default: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_extra_attributes" + description: "By default, the pipeline uses the gene_name field to obtain additional\ + \ gene identifiers from the input GTF file when running Salmon." + info: null + default: + - "gene_name" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_out_dir" + description: "Directory containing Salmon quantification results." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_results_file" + description: "Salmon quantification file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_quant_out_dir" + description: "Directory containing quantification results for pseudo alignment." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_salmon_quant_results_file" + description: "Quantification file from Salmon for pseudo alignment." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_kallisto_quant_results_file" + description: "Quantification file from Kallisto for pseudo alignment." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--aligner" + description: "Method used for alognment and qqunatification." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--pseudo_aligner" + description: "Method used for pseudo alignment and quantification." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_counts_gene" + description: "Expression counts on gene level" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_counts_transcripts" + description: "Expression counts on transcript level" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--skip_qc" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_biotype_qc" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_align" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_pseudo_align" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_dupradar" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_qualimap" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_rseqc" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_multiqc" + info: null + direction: "input" + - type: "boolean" + name: "--skip_preseq" + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_preseq_args" + info: null + default: + - "-verbose -bam -seed 1" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--featurecounts_group_type" + description: "The attribute type used to group feature types in the GTF file when\ + \ generating the biotype plot with featureCounts." + info: null + default: + - "gene_biotype" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--featurecounts_feature_type" + description: "By default, the pipeline assigns reads based on the 'exon' attribute\ + \ within the GTF file." + info: null + default: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--gencode" + description: "Specify if the GTF annotation is in GENCODE format." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--biotypes_header" + info: null + default: + - "src/assets/multiqc/biotypes_header.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--biotype" + description: "Biotype value to use while appending entries to GTF file when additional\ + \ fasta file is provided." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--rseqc_modules" + description: "Specify the RSeQC modules to run_wf" + info: null + default: + - "bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication" + required: false + choices: + - "bam_stat" + - "inner_distance" + - "infer_experiment" + - "junction_annotation" + - "junction_saturation" + - "read_distribution" + - "read_duplication" + - "tin" + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--sample_size" + description: "Numer of reads sampled from SAM/BAM file to infer experiment and\ + \ calculate inner distance, default = 200000." + info: null + default: + - 200000 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--lower_bound_size" + description: "Lower bound of inner distance (bp). This option is used for ploting\ + \ histograme, default = -250." + info: null + default: + - -250 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--upper_bound_size" + description: "Upper bound of inner distance (bp). This option is used for ploting\ + \ histograme, default = 250." + info: null + default: + - 250 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--step_size" + description: "Step size (bp) of histograme of inner distance. This option is used\ + \ for plotting histogram, default = 5." + info: null + default: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--map_qual" + description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ + \ reads, default = 30." + info: null + default: + - 30 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_intron" + description: "Minimum intron length (bp) to call a junction, default = 50." + info: null + default: + - 50 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_splice_read" + description: "Minimum number of supporting reads to call a junction, default =\ + \ 1." + info: null + default: + - 1 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sampling_percentile_lower_bound" + description: "Read sampling for junction saturation starts from this percentile,\ + \ must be an integer between 0 and 100, default = 5." + info: null + default: + - 5 + required: false + min: 0 + max: 100 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sampling_percentile_upper_bound" + description: "Read sampling for junction saturation ends at this percentile, must\ + \ be an integer between 0 and 100, default = 5." + info: null + default: + - 100 + required: false + min: 0 + max: 100 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sampling_percentile_step" + description: "Read sampling for junction saturation frequency in %. Smaller value\ + \ means more sampling times. Must be an integer between 0 and 100, default =\ + \ 5." + info: null + default: + - 5 + required: false + min: 0 + max: 100 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--read_count_upper_limit" + description: "Upper limit of reads' occurence to determine read duplication. Only\ + \ used for plotting, default = 500 (times)." + info: null + default: + - 500 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--minimum_coverage" + description: "Minimum number of reads mapped to a transcript to determin tin,\ + \ default = 10." + info: null + default: + - 10 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--tin_sample_size" + description: "Number of equal-spaced nucleotide positions picked from mRNA. Note,\ + \ if this number is larger than the length of mRNA (L), it will be halved until\ + \ it's smaller than L (default = 100)" + info: null + default: + - 100 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--subtract_background" + description: "Set flag to subtract background noise (estimated from intronic reads)\ + \ to determine tin. Only use this option if there are substantial intronic reads." + info: null + direction: "input" + - type: "integer" + name: "--pr_bases" + description: "Number of upstream/downstream nucleotide bases to compute 5'-3'\ + \ bias for qualimap (default = 100)." + info: null + default: + - 100 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--tr_bias" + description: "Number of top highly expressed transcripts to compute 5'-3' bias\ + \ for qualimap (default = 1000)." + info: null + default: + - 1000 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--algorithm" + description: "Counting algorithm for qualimap (uniquely-mapped-reads (default)\ + \ or proportional)." + info: null + default: + - "uniquely-mapped-reads" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sequencing_protocol" + description: "Sequencing library protocol for qualimap (strand-specific-forward,\ + \ strand-specific-reverse or non-strand-specific (default))." + info: null + default: + - "non-strand-specific" + required: false + choices: + - "non-strand-specific" + - "strand-specific-reverse" + - "strand-specific-forward" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--sorted" + description: "Setting this flag indicates that the input file is already sorted\ + \ by name. If flag is not set, additional sorting by name will be performed\ + \ for qualimap. Only requiredfor paired-end analysis." + info: null + direction: "input" + - type: "string" + name: "--java_memory_size" + description: "maximum Java heap memory size for qualimap, default = 4G." + info: null + default: + - "4G" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_deseq2_qc" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--deseq2_vst" + description: "Use vst transformation instead of rlog with DESeq2" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_custom_config" + description: "Custom multiqc configuration file\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--multiqc_title" + description: "Custom multiqc title\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_methods_description" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--passed_trimmed_reads" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--num_trimmed_reads" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--passed_mapping" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--percent_mapped" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_zip_1" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_zip_2" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_zip_1" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_zip_2" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_log_1" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_log_2" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sortmerna_multiqc" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_multiqc" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_stats" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_flagstat" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_idxstats" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--markduplicates_multiqc" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_multiqc" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--preseq_output" + info: null + default: + - "$id.lc_extrap.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bamstat_output" + description: "Path to output file (txt) of mapping quality statistics" + info: null + default: + - "$id.mapping_quality.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--strandedness_output" + description: "Path to output report (txt) of inferred strandedness" + info: null + default: + - "$id.strandedness.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_stats" + description: "output file (txt) with summary statistics of inner distances of\ + \ paired reads" + info: null + default: + - "$id.inner_distance.stats" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_dist" + description: "output file (txt) with inner distances of all paired reads" + info: null + default: + - "$id.inner_distance.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_freq" + description: "output file (txt) with frequencies of inner distances of all paired\ + \ reads" + info: null + default: + - "$id.inner_distance_freq.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_plot" + description: "output file (pdf) with histogram plot of of inner distances of all\ + \ paired reads" + info: null + default: + - "$id.inner_distance_plot.pdf" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_plot_r" + description: "output file (R) with script of histogram plot of of inner distances\ + \ of all paired reads" + info: null + default: + - "$id.inner_distance_plot.r" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_log" + description: "output log of junction annotation script" + info: null + default: + - "$id.junction_annotation.log" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_plot_r" + description: "R script to generate splice_junction and splice_events plot" + info: null + default: + - "$id.junction_annotation_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_junction_bed" + description: "junction annotation file (bed format)" + info: null + default: + - "$id.junction_annotation.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_junction_interact" + description: "interact file (bed format) of junctions. Can be uploaded to UCSC\ + \ genome browser or converted to bigInteract (using bedToBigBed program) for\ + \ visualization." + info: null + default: + - "$id.junction_annotation.Interact.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_junction_sheet" + description: "junction annotation file (xls format)" + info: null + default: + - "$id.junction_annotation.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_splice_events_plot" + description: "plot of splice events (pdf)" + info: null + default: + - "$id.splice_events.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_splice_junctions_plot" + description: "plot of junctions (pdf)" + info: null + default: + - "$id.splice_junctions_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_saturation_output_plot_r" + description: "r script to generate junction_saturation_plot plot" + info: null + default: + - "$id.junction_saturation_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_saturation_output_plot" + description: "plot of junction saturation (pdf" + info: null + default: + - "$id.junction_saturation_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_distribution_output" + description: "output file (txt) of read distribution analysis." + info: null + default: + - "$id.read_distribution.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_plot_r" + description: "R script for generating duplication rate plot" + info: null + default: + - "$id.duplication_rate_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_plot" + description: "duplication rate plot (pdf)" + info: null + default: + - "$id.duplication_rate_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_mapping" + description: "Summary of mapping-based read duplication" + info: null + default: + - "$id.duplication_rate_mapping.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_sequence" + description: "Summary of sequencing-based read duplication" + info: null + default: + - "$id.duplication_rate_sequencing.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tin_output_summary" + description: "summary statistics (txt) of calculated TIN metrics" + info: null + default: + - "$id.tin_summary.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tin_output_metrics" + description: "file with TIN metrics (xls)" + info: null + default: + - "$id.tin.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_dupmatrix" + description: "path to output file (txt) of duplicate tag counts" + info: null + default: + - "$id.dup_matrix.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_dup_intercept_mqc" + description: "path to output file (txt) of multiqc intercept value DupRadar" + info: null + default: + - "$id.dup_intercept_mqc.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_duprate_exp_boxplot" + description: "path to output file (pdf) of distribution of expression box plot" + info: null + default: + - "$id.duprate_exp_boxplot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_duprate_exp_densplot" + description: "path to output file (pdf) of 2D density scatter plot of duplicate\ + \ tag counts" + info: null + default: + - "$id.duprate_exp_densityplot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_duprate_exp_denscurve_mqc" + description: "path to output file (pdf) of density curve of gene duplication multiqc" + info: null + default: + - "$id.duprate_exp_density_curve_mqc.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_expression_histogram" + description: "path to output file (pdf) of distribution of RPK values per gene\ + \ histogram" + info: null + default: + - "$id.expression_hist.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_intercept_slope" + info: null + default: + - "$id.intercept_slope.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_qc_report" + description: "Text file containing the RNAseq QC results." + info: null + example: + - "$id.rnaseq_qc_results.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_counts" + description: "Output file for computed counts." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_report" + description: "Report output file. Supported formats are PDF or HTML." + info: null + example: + - "$id.report.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--deseq2_output" + info: null + default: + - "deseq2" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--deseq2_output_pseudo" + info: null + default: + - "deseq2_pseudo" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_report" + info: null + default: + - "multiqc_report.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_data" + info: null + default: + - "multiqc_data" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_plots" + info: null + default: + - "multiqc_plots" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts" + info: null + default: + - "$id.featureCounts.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_summary" + info: null + default: + - "$id.featureCounts.txt.summary" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_multiqc" + info: null + default: + - "$id.featureCounts_mqc.tsv" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_rrna_multiqc" + info: null + default: + - "$id.featureCounts_rrna_mqc.tsv" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_gene" + info: null + default: + - "salmon.merged.gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene" + info: null + default: + - "salmon.merged.gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_length_scaled" + info: null + default: + - "salmon.merged.gene_counts_length_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_scaled" + info: null + default: + - "salmon.merged.gene_counts_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_transcript" + info: null + default: + - "salmon.merged.transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcript" + info: null + default: + - "salmon.merged.transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_merged_summarizedexperiment" + info: null + default: + - "salmon_merged_summarizedexperiment" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_tpm_gene" + info: null + default: + - "pseudo_gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_gene" + info: null + default: + - "pseudo_gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_gene_length_scaled" + info: null + default: + - "pseudo_gene_counts_length_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_gene_scaled" + info: null + default: + - "pseudo_gene_counts_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_tpm_transcript" + info: null + default: + - "pseudo_transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_transcript" + info: null + default: + - "pseudo_transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_quant_merged_summarizedexperiment" + info: null + default: + - "pseudo_quant_merged_summarizedexperiment" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A subworkflow for the final quality control stage of the nf-core/rnaseq\ + \ pipeline.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "rseqc/rseqc_bamstat" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "rseqc/rseqc_inferexperiment" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "rseqc/rseqc_inner_distance" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "rseqc/rseqc_junctionannotation" + repository: + type: "local" +- name: "rseqc/rseqc_junctionsaturation" + repository: + type: "local" +- name: "rseqc/rseqc_readdistribution" + repository: + type: "local" +- name: "rseqc/rseqc_readduplication" + repository: + type: "local" +- name: "rseqc/rseqc_tin" + repository: + type: "local" +- name: "dupradar" + repository: + type: "local" +- name: "qualimap/qualimap_rnaseq" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "preseq_lcextrap" + repository: + type: "local" +- name: "featurecounts" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "multiqc_custom_biotype" + repository: + type: "local" +- name: "deseq2_qc" + repository: + type: "local" +- name: "prepare_multiqc_input" + repository: + type: "local" +- name: "multiqc" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "rsem_merge_counts" + repository: + type: "local" +- name: "workflows/merge_quant_results" + repository: + type: "local" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/quality_control/config.vsh.yaml" + runner: "executable" + engine: "native" + output: "target/executable/workflows/quality_control" + executable: "target/executable/workflows/quality_control/quality_control" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance" + - "target/nextflow/rseqc/rseqc_junctionannotation" + - "target/nextflow/rseqc/rseqc_junctionsaturation" + - "target/nextflow/rseqc/rseqc_readdistribution" + - "target/nextflow/rseqc/rseqc_readduplication" + - "target/nextflow/rseqc/rseqc_tin" + - "target/nextflow/dupradar" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq" + - "target/nextflow/preseq_lcextrap" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts" + - "target/nextflow/multiqc_custom_biotype" + - "target/nextflow/deseq2_qc" + - "target/nextflow/prepare_multiqc_input" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc" + - "target/nextflow/rsem_merge_counts" + - "target/nextflow/workflows/merge_quant_results" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/workflows/quality_control/nextflow_labels.config b/target/executable/workflows/quality_control/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/workflows/quality_control/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/workflows/quality_control/quality_control b/target/executable/workflows/quality_control/quality_control new file mode 100755 index 0000000..c42a7b2 --- /dev/null +++ b/target/executable/workflows/quality_control/quality_control @@ -0,0 +1,4345 @@ +#!/usr/bin/env bash + +# quality_control v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="quality_control" +VIASH_META_FUNCTIONALITY_NAME="quality_control" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "quality_control v0.3.0" + echo "" + echo "A subworkflow for the final quality control stage of the nf-core/rnaseq" + echo "pipeline." + echo "" + echo "Input:" + echo " --id" + echo " type: string, required parameter" + echo " example: test" + echo " ID of the sample" + echo "" + echo " --strandedness" + echo " type: string" + echo " default: unstranded" + echo " Sample strand-specificity. Must be one of unstranded, forward, reverse" + echo "" + echo " --paired" + echo " type: boolean" + echo " Paired-end reads?" + echo "" + echo " --genome_bam" + echo " type: file, file must exist" + echo " Input genome BAM file" + echo "" + echo " --genome_bam_index" + echo " type: file, file must exist" + echo " Genome BAM index file" + echo "" + echo " --gene_bed" + echo " type: file, file must exist" + echo " Path to BED file containing gene intervals. This will be created from" + echo " the GTF file if not specified." + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo " GTF file" + echo "" + echo " --gtf_group_features" + echo " type: string" + echo " default: gene_id" + echo " Define the attribute type used to group features in the GTF file when" + echo " running Salmon." + echo "" + echo " --gtf_extra_attributes" + echo " type: string" + echo " default: gene_name" + echo " By default, the pipeline uses the gene_name field to obtain additional" + echo " gene identifiers from the input GTF file when running Salmon." + echo "" + echo " --quant_out_dir" + echo " type: file, file must exist" + echo " Directory containing Salmon quantification results." + echo "" + echo " --quant_results_file" + echo " type: file, file must exist" + echo " Salmon quantification file." + echo "" + echo " --pseudo_quant_out_dir" + echo " type: file, file must exist" + echo " Directory containing quantification results for pseudo alignment." + echo "" + echo " --pseudo_salmon_quant_results_file" + echo " type: file, file must exist" + echo " Quantification file from Salmon for pseudo alignment." + echo "" + echo " --pseudo_kallisto_quant_results_file" + echo " type: file, file must exist" + echo " Quantification file from Kallisto for pseudo alignment." + echo "" + echo " --aligner" + echo " type: string" + echo " Method used for alognment and qqunatification." + echo "" + echo " --pseudo_aligner" + echo " type: string" + echo " Method used for pseudo alignment and quantification." + echo "" + echo " --rsem_counts_gene" + echo " type: file, file must exist" + echo " Expression counts on gene level" + echo "" + echo " --rsem_counts_transcripts" + echo " type: file, file must exist" + echo " Expression counts on transcript level" + echo "" + echo " --skip_qc" + echo " type: boolean_true" + echo "" + echo " --skip_biotype_qc" + echo " type: boolean_true" + echo "" + echo " --skip_align" + echo " type: boolean_true" + echo "" + echo " --skip_pseudo_align" + echo " type: boolean_true" + echo "" + echo " --skip_dupradar" + echo " type: boolean_true" + echo "" + echo " --skip_qualimap" + echo " type: boolean_true" + echo "" + echo " --skip_rseqc" + echo " type: boolean_true" + echo "" + echo " --skip_multiqc" + echo " type: boolean_true" + echo "" + echo " --skip_preseq" + echo " type: boolean" + echo " default: false" + echo "" + echo " --extra_preseq_args" + echo " type: string" + echo " default: -verbose -bam -seed 1" + echo "" + echo " --featurecounts_group_type" + echo " type: string" + echo " default: gene_biotype" + echo " The attribute type used to group feature types in the GTF file when" + echo " generating the biotype plot with featureCounts." + echo "" + echo " --featurecounts_feature_type" + echo " type: string" + echo " default: exon" + echo " By default, the pipeline assigns reads based on the 'exon' attribute" + echo " within the GTF file." + echo "" + echo " --gencode" + echo " type: boolean" + echo " Specify if the GTF annotation is in GENCODE format." + echo "" + echo " --biotypes_header" + echo " type: file, file must exist" + echo " default: src/assets/multiqc/biotypes_header.txt" + echo "" + echo " --biotype" + echo " type: string" + echo " Biotype value to use while appending entries to GTF file when additional" + echo " fasta file is provided." + echo "" + echo " --rseqc_modules" + echo " type: string, multiple values allowed" + echo " default:" + echo "bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication" + echo " choices: [ bam_stat, inner_distance, infer_experiment," + echo "junction_annotation, junction_saturation, read_distribution, read_duplication," + echo "tin ]" + echo " Specify the RSeQC modules to run_wf" + echo "" + echo " --sample_size" + echo " type: integer" + echo " default: 200000" + echo " min: 1" + echo " Numer of reads sampled from SAM/BAM file to infer experiment and" + echo " calculate inner distance, default = 200000." + echo "" + echo " --lower_bound_size" + echo " type: integer" + echo " default: -250" + echo " Lower bound of inner distance (bp). This option is used for ploting" + echo " histograme, default = -250." + echo "" + echo " --upper_bound_size" + echo " type: integer" + echo " default: 250" + echo " Upper bound of inner distance (bp). This option is used for ploting" + echo " histograme, default = 250." + echo "" + echo " --step_size" + echo " type: integer" + echo " default: 5" + echo " Step size (bp) of histograme of inner distance. This option is used for" + echo " plotting histogram, default = 5." + echo "" + echo " --map_qual" + echo " type: integer" + echo " default: 30" + echo " min: 0" + echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" + echo " reads, default = 30." + echo "" + echo " --min_intron" + echo " type: integer" + echo " default: 50" + echo " min: 1" + echo " Minimum intron length (bp) to call a junction, default = 50." + echo "" + echo " --min_splice_read" + echo " type: integer" + echo " default: 1" + echo " min: 1" + echo " Minimum number of supporting reads to call a junction, default = 1." + echo "" + echo " --sampling_percentile_lower_bound" + echo " type: integer" + echo " default: 5" + echo " min: 0" + echo " max: 100" + echo " Read sampling for junction saturation starts from this percentile, must" + echo " be an integer between 0 and 100, default = 5." + echo "" + echo " --sampling_percentile_upper_bound" + echo " type: integer" + echo " default: 100" + echo " min: 0" + echo " max: 100" + echo " Read sampling for junction saturation ends at this percentile, must be" + echo " an integer between 0 and 100, default = 5." + echo "" + echo " --sampling_percentile_step" + echo " type: integer" + echo " default: 5" + echo " min: 0" + echo " max: 100" + echo " Read sampling for junction saturation frequency in %. Smaller value" + echo " means more sampling times. Must be an integer between 0 and 100, default" + echo " = 5." + echo "" + echo " --read_count_upper_limit" + echo " type: integer" + echo " default: 500" + echo " min: 1" + echo " Upper limit of reads' occurence to determine read duplication. Only used" + echo " for plotting, default = 500 (times)." + echo "" + echo " --minimum_coverage" + echo " type: integer" + echo " default: 10" + echo " min: 1" + echo " Minimum number of reads mapped to a transcript to determin tin, default" + echo " = 10." + echo "" + echo " --tin_sample_size" + echo " type: integer" + echo " default: 100" + echo " min: 1" + echo " Number of equal-spaced nucleotide positions picked from mRNA. Note, if" + echo " this number is larger than the length of mRNA (L), it will be halved" + echo " until it's smaller than L (default = 100)" + echo "" + echo " --subtract_background" + echo " type: boolean_true" + echo " Set flag to subtract background noise (estimated from intronic reads) to" + echo " determine tin. Only use this option if there are substantial intronic" + echo " reads." + echo "" + echo " --pr_bases" + echo " type: integer" + echo " default: 100" + echo " min: 1" + echo " Number of upstream/downstream nucleotide bases to compute 5'-3' bias for" + echo " qualimap (default = 100)." + echo "" + echo " --tr_bias" + echo " type: integer" + echo " default: 1000" + echo " min: 1" + echo " Number of top highly expressed transcripts to compute 5'-3' bias for" + echo " qualimap (default = 1000)." + echo "" + echo " --algorithm" + echo " type: string" + echo " default: uniquely-mapped-reads" + echo " Counting algorithm for qualimap (uniquely-mapped-reads (default) or" + echo " proportional)." + echo "" + echo " --sequencing_protocol" + echo " type: string" + echo " default: non-strand-specific" + echo " choices: [ non-strand-specific, strand-specific-reverse," + echo "strand-specific-forward ]" + echo " Sequencing library protocol for qualimap (strand-specific-forward," + echo " strand-specific-reverse or non-strand-specific (default))." + echo "" + echo " --sorted" + echo " type: boolean_true" + echo " Setting this flag indicates that the input file is already sorted by" + echo " name. If flag is not set, additional sorting by name will be performed" + echo " for qualimap. Only requiredfor paired-end analysis." + echo "" + echo " --java_memory_size" + echo " type: string" + echo " default: 4G" + echo " maximum Java heap memory size for qualimap, default = 4G." + echo "" + echo " --skip_deseq2_qc" + echo " type: boolean" + echo "" + echo " --deseq2_vst" + echo " type: boolean" + echo " Use vst transformation instead of rlog with DESeq2" + echo "" + echo " --multiqc_custom_config" + echo " type: file, file must exist" + echo " Custom multiqc configuration file" + echo "" + echo " --multiqc_title" + echo " type: string" + echo " Custom multiqc title" + echo "" + echo " --multiqc_methods_description" + echo " type: file, file must exist" + echo "" + echo " --passed_trimmed_reads" + echo " type: boolean" + echo "" + echo " --num_trimmed_reads" + echo " type: double" + echo "" + echo " --passed_mapping" + echo " type: boolean" + echo "" + echo " --percent_mapped" + echo " type: double" + echo "" + echo " --fastqc_zip_1" + echo " type: file" + echo "" + echo " --fastqc_zip_2" + echo " type: file" + echo "" + echo " --trim_zip_1" + echo " type: file" + echo "" + echo " --trim_zip_2" + echo " type: file" + echo "" + echo " --trim_log_1" + echo " type: file" + echo "" + echo " --trim_log_2" + echo " type: file" + echo "" + echo " --sortmerna_multiqc" + echo " type: file" + echo "" + echo " --star_multiqc" + echo " type: file" + echo "" + echo " --rsem_multiqc" + echo " type: file, file must exist" + echo "" + echo " --genome_bam_stats" + echo " type: file" + echo "" + echo " --genome_bam_flagstat" + echo " type: file" + echo "" + echo " --genome_bam_idxstats" + echo " type: file" + echo "" + echo " --markduplicates_multiqc" + echo " type: file" + echo "" + echo " --pseudo_multiqc" + echo " type: file" + echo "" + echo "Output:" + echo " --preseq_output" + echo " type: file, output, file must exist" + echo " default: \$id.lc_extrap.txt" + echo "" + echo " --bamstat_output" + echo " type: file, output, file must exist" + echo " default: \$id.mapping_quality.txt" + echo " Path to output file (txt) of mapping quality statistics" + echo "" + echo " --strandedness_output" + echo " type: file, output, file must exist" + echo " default: \$id.strandedness.txt" + echo " Path to output report (txt) of inferred strandedness" + echo "" + echo " --inner_dist_output_stats" + echo " type: file, output" + echo " default: \$id.inner_distance.stats" + echo " output file (txt) with summary statistics of inner distances of paired" + echo " reads" + echo "" + echo " --inner_dist_output_dist" + echo " type: file, output" + echo " default: \$id.inner_distance.txt" + echo " output file (txt) with inner distances of all paired reads" + echo "" + echo " --inner_dist_output_freq" + echo " type: file, output" + echo " default: \$id.inner_distance_freq.txt" + echo " output file (txt) with frequencies of inner distances of all paired" + echo " reads" + echo "" + echo " --inner_dist_output_plot" + echo " type: file, output" + echo " default: \$id.inner_distance_plot.pdf" + echo " output file (pdf) with histogram plot of of inner distances of all" + echo " paired reads" + echo "" + echo " --inner_dist_output_plot_r" + echo " type: file, output" + echo " default: \$id.inner_distance_plot.r" + echo " output file (R) with script of histogram plot of of inner distances of" + echo " all paired reads" + echo "" + echo " --junction_annotation_output_log" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation.log" + echo " output log of junction annotation script" + echo "" + echo " --junction_annotation_output_plot_r" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation_plot.r" + echo " R script to generate splice_junction and splice_events plot" + echo "" + echo " --junction_annotation_output_junction_bed" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation.bed" + echo " junction annotation file (bed format)" + echo "" + echo " --junction_annotation_output_junction_interact" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation.Interact.bed" + echo " interact file (bed format) of junctions. Can be uploaded to UCSC genome" + echo " browser or converted to bigInteract (using bedToBigBed program) for" + echo " visualization." + echo "" + echo " --junction_annotation_output_junction_sheet" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation.xls" + echo " junction annotation file (xls format)" + echo "" + echo " --junction_annotation_output_splice_events_plot" + echo " type: file, output, file must exist" + echo " default: \$id.splice_events.pdf" + echo " plot of splice events (pdf)" + echo "" + echo " --junction_annotation_output_splice_junctions_plot" + echo " type: file, output, file must exist" + echo " default: \$id.splice_junctions_plot.pdf" + echo " plot of junctions (pdf)" + echo "" + echo " --junction_saturation_output_plot_r" + echo " type: file, output, file must exist" + echo " default: \$id.junction_saturation_plot.r" + echo " r script to generate junction_saturation_plot plot" + echo "" + echo " --junction_saturation_output_plot" + echo " type: file, output, file must exist" + echo " default: \$id.junction_saturation_plot.pdf" + echo " plot of junction saturation (pdf" + echo "" + echo " --read_distribution_output" + echo " type: file, output, file must exist" + echo " default: \$id.read_distribution.txt" + echo " output file (txt) of read distribution analysis." + echo "" + echo " --read_duplication_output_duplication_rate_plot_r" + echo " type: file, output, file must exist" + echo " default: \$id.duplication_rate_plot.r" + echo " R script for generating duplication rate plot" + echo "" + echo " --read_duplication_output_duplication_rate_plot" + echo " type: file, output, file must exist" + echo " default: \$id.duplication_rate_plot.pdf" + echo " duplication rate plot (pdf)" + echo "" + echo " --read_duplication_output_duplication_rate_mapping" + echo " type: file, output, file must exist" + echo " default: \$id.duplication_rate_mapping.xls" + echo " Summary of mapping-based read duplication" + echo "" + echo " --read_duplication_output_duplication_rate_sequence" + echo " type: file, output, file must exist" + echo " default: \$id.duplication_rate_sequencing.xls" + echo " Summary of sequencing-based read duplication" + echo "" + echo " --tin_output_summary" + echo " type: file, output, file must exist" + echo " default: \$id.tin_summary.txt" + echo " summary statistics (txt) of calculated TIN metrics" + echo "" + echo " --tin_output_metrics" + echo " type: file, output, file must exist" + echo " default: \$id.tin.xls" + echo " file with TIN metrics (xls)" + echo "" + echo " --dupradar_output_dupmatrix" + echo " type: file, output, file must exist" + echo " default: \$id.dup_matrix.txt" + echo " path to output file (txt) of duplicate tag counts" + echo "" + echo " --dupradar_output_dup_intercept_mqc" + echo " type: file, output, file must exist" + echo " default: \$id.dup_intercept_mqc.txt" + echo " path to output file (txt) of multiqc intercept value DupRadar" + echo "" + echo " --dupradar_output_duprate_exp_boxplot" + echo " type: file, output, file must exist" + echo " default: \$id.duprate_exp_boxplot.pdf" + echo " path to output file (pdf) of distribution of expression box plot" + echo "" + echo " --dupradar_output_duprate_exp_densplot" + echo " type: file, output, file must exist" + echo " default: \$id.duprate_exp_densityplot.pdf" + echo " path to output file (pdf) of 2D density scatter plot of duplicate tag" + echo " counts" + echo "" + echo " --dupradar_output_duprate_exp_denscurve_mqc" + echo " type: file, output, file must exist" + echo " default: \$id.duprate_exp_density_curve_mqc.pdf" + echo " path to output file (pdf) of density curve of gene duplication multiqc" + echo "" + echo " --dupradar_output_expression_histogram" + echo " type: file, output, file must exist" + echo " default: \$id.expression_hist.pdf" + echo " path to output file (pdf) of distribution of RPK values per gene" + echo " histogram" + echo "" + echo " --dupradar_output_intercept_slope" + echo " type: file, output, file must exist" + echo " default: \$id.intercept_slope.txt" + echo "" + echo " --qualimap_qc_report" + echo " type: file, output, file must exist" + echo " example: \$id.rnaseq_qc_results.txt" + echo " Text file containing the RNAseq QC results." + echo "" + echo " --qualimap_counts" + echo " type: file, output, file must exist" + echo " Output file for computed counts." + echo "" + echo " --qualimap_report" + echo " type: file, output, file must exist" + echo " example: \$id.report.html" + echo " Report output file. Supported formats are PDF or HTML." + echo "" + echo " --deseq2_output" + echo " type: file, output, file must exist" + echo " default: deseq2" + echo "" + echo " --deseq2_output_pseudo" + echo " type: file, output, file must exist" + echo " default: deseq2_pseudo" + echo "" + echo " --multiqc_report" + echo " type: file, output, file must exist" + echo " default: multiqc_report.html" + echo "" + echo " --multiqc_data" + echo " type: file, output, file must exist" + echo " default: multiqc_data" + echo "" + echo " --multiqc_plots" + echo " type: file, output, file must exist" + echo " default: multiqc_plots" + echo "" + echo " --featurecounts" + echo " type: file, output" + echo " default: \$id.featureCounts.txt" + echo "" + echo " --featurecounts_summary" + echo " type: file, output" + echo " default: \$id.featureCounts.txt.summary" + echo "" + echo " --featurecounts_multiqc" + echo " type: file, output" + echo " default: \$id.featureCounts_mqc.tsv" + echo "" + echo " --featurecounts_rrna_multiqc" + echo " type: file, output" + echo " default: \$id.featureCounts_rrna_mqc.tsv" + echo "" + echo " --tpm_gene" + echo " type: file, output, file must exist" + echo " default: salmon.merged.gene_tpm.tsv" + echo "" + echo " --counts_gene" + echo " type: file, output, file must exist" + echo " default: salmon.merged.gene_counts.tsv" + echo "" + echo " --counts_gene_length_scaled" + echo " type: file, output, file must exist" + echo " default: salmon.merged.gene_counts_length_scaled.tsv" + echo "" + echo " --counts_gene_scaled" + echo " type: file, output, file must exist" + echo " default: salmon.merged.gene_counts_scaled.tsv" + echo "" + echo " --tpm_transcript" + echo " type: file, output, file must exist" + echo " default: salmon.merged.transcript_tpm.tsv" + echo "" + echo " --counts_transcript" + echo " type: file, output, file must exist" + echo " default: salmon.merged.transcript_counts.tsv" + echo "" + echo " --quant_merged_summarizedexperiment" + echo " type: file, output, file must exist" + echo " default: salmon_merged_summarizedexperiment" + echo "" + echo " --pseudo_tpm_gene" + echo " type: file, output, file must exist" + echo " default: pseudo_gene_tpm.tsv" + echo "" + echo " --pseudo_counts_gene" + echo " type: file, output, file must exist" + echo " default: pseudo_gene_counts.tsv" + echo "" + echo " --pseudo_counts_gene_length_scaled" + echo " type: file, output, file must exist" + echo " default: pseudo_gene_counts_length_scaled.tsv" + echo "" + echo " --pseudo_counts_gene_scaled" + echo " type: file, output, file must exist" + echo " default: pseudo_gene_counts_scaled.tsv" + echo "" + echo " --pseudo_tpm_transcript" + echo " type: file, output, file must exist" + echo " default: pseudo_transcript_tpm.tsv" + echo "" + echo " --pseudo_counts_transcript" + echo " type: file, output, file must exist" + echo " default: pseudo_transcript_counts.tsv" + echo "" + echo " --pseudo_quant_merged_summarizedexperiment" + echo " type: file, output, file must exist" + echo " default: pseudo_quant_merged_summarizedexperiment" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "quality_control v0.3.0" + exit + ;; + --id) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id=*) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id=*\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --strandedness) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --strandedness. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strandedness=*) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness=*\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --paired) + [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PAIRED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --paired. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --paired=*) + [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired=*\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PAIRED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam) + [ -n "$VIASH_PAR_GENOME_BAM" ] && ViashError Bad arguments for option \'--genome_bam\': \'$VIASH_PAR_GENOME_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam=*) + [ -n "$VIASH_PAR_GENOME_BAM" ] && ViashError Bad arguments for option \'--genome_bam=*\': \'$VIASH_PAR_GENOME_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_index) + [ -n "$VIASH_PAR_GENOME_BAM_INDEX" ] && ViashError Bad arguments for option \'--genome_bam_index\': \'$VIASH_PAR_GENOME_BAM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_index=*) + [ -n "$VIASH_PAR_GENOME_BAM_INDEX" ] && ViashError Bad arguments for option \'--genome_bam_index=*\': \'$VIASH_PAR_GENOME_BAM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gene_bed) + [ -n "$VIASH_PAR_GENE_BED" ] && ViashError Bad arguments for option \'--gene_bed\': \'$VIASH_PAR_GENE_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENE_BED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gene_bed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gene_bed=*) + [ -n "$VIASH_PAR_GENE_BED" ] && ViashError Bad arguments for option \'--gene_bed=*\': \'$VIASH_PAR_GENE_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENE_BED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf=*) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf_group_features) + [ -n "$VIASH_PAR_GTF_GROUP_FEATURES" ] && ViashError Bad arguments for option \'--gtf_group_features\': \'$VIASH_PAR_GTF_GROUP_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_GROUP_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_group_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf_group_features=*) + [ -n "$VIASH_PAR_GTF_GROUP_FEATURES" ] && ViashError Bad arguments for option \'--gtf_group_features=*\': \'$VIASH_PAR_GTF_GROUP_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_GROUP_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf_extra_attributes) + [ -n "$VIASH_PAR_GTF_EXTRA_ATTRIBUTES" ] && ViashError Bad arguments for option \'--gtf_extra_attributes\': \'$VIASH_PAR_GTF_EXTRA_ATTRIBUTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_EXTRA_ATTRIBUTES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_extra_attributes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf_extra_attributes=*) + [ -n "$VIASH_PAR_GTF_EXTRA_ATTRIBUTES" ] && ViashError Bad arguments for option \'--gtf_extra_attributes=*\': \'$VIASH_PAR_GTF_EXTRA_ATTRIBUTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_EXTRA_ATTRIBUTES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quant_out_dir) + [ -n "$VIASH_PAR_QUANT_OUT_DIR" ] && ViashError Bad arguments for option \'--quant_out_dir\': \'$VIASH_PAR_QUANT_OUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_OUT_DIR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_out_dir. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quant_out_dir=*) + [ -n "$VIASH_PAR_QUANT_OUT_DIR" ] && ViashError Bad arguments for option \'--quant_out_dir=*\': \'$VIASH_PAR_QUANT_OUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_OUT_DIR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quant_results_file) + [ -n "$VIASH_PAR_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--quant_results_file\': \'$VIASH_PAR_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_RESULTS_FILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_results_file. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quant_results_file=*) + [ -n "$VIASH_PAR_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--quant_results_file=*\': \'$VIASH_PAR_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_RESULTS_FILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_quant_out_dir) + [ -n "$VIASH_PAR_PSEUDO_QUANT_OUT_DIR" ] && ViashError Bad arguments for option \'--pseudo_quant_out_dir\': \'$VIASH_PAR_PSEUDO_QUANT_OUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_QUANT_OUT_DIR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_quant_out_dir. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_quant_out_dir=*) + [ -n "$VIASH_PAR_PSEUDO_QUANT_OUT_DIR" ] && ViashError Bad arguments for option \'--pseudo_quant_out_dir=*\': \'$VIASH_PAR_PSEUDO_QUANT_OUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_QUANT_OUT_DIR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_salmon_quant_results_file) + [ -n "$VIASH_PAR_PSEUDO_SALMON_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--pseudo_salmon_quant_results_file\': \'$VIASH_PAR_PSEUDO_SALMON_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_SALMON_QUANT_RESULTS_FILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_salmon_quant_results_file. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_salmon_quant_results_file=*) + [ -n "$VIASH_PAR_PSEUDO_SALMON_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--pseudo_salmon_quant_results_file=*\': \'$VIASH_PAR_PSEUDO_SALMON_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_SALMON_QUANT_RESULTS_FILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_kallisto_quant_results_file) + [ -n "$VIASH_PAR_PSEUDO_KALLISTO_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--pseudo_kallisto_quant_results_file\': \'$VIASH_PAR_PSEUDO_KALLISTO_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_KALLISTO_QUANT_RESULTS_FILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_kallisto_quant_results_file. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_kallisto_quant_results_file=*) + [ -n "$VIASH_PAR_PSEUDO_KALLISTO_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--pseudo_kallisto_quant_results_file=*\': \'$VIASH_PAR_PSEUDO_KALLISTO_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_KALLISTO_QUANT_RESULTS_FILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --aligner) + [ -n "$VIASH_PAR_ALIGNER" ] && ViashError Bad arguments for option \'--aligner\': \'$VIASH_PAR_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --aligner. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --aligner=*) + [ -n "$VIASH_PAR_ALIGNER" ] && ViashError Bad arguments for option \'--aligner=*\': \'$VIASH_PAR_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_aligner) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER" ] && ViashError Bad arguments for option \'--pseudo_aligner\': \'$VIASH_PAR_PSEUDO_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_aligner. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_aligner=*) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER" ] && ViashError Bad arguments for option \'--pseudo_aligner=*\': \'$VIASH_PAR_PSEUDO_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --rsem_counts_gene) + [ -n "$VIASH_PAR_RSEM_COUNTS_GENE" ] && ViashError Bad arguments for option \'--rsem_counts_gene\': \'$VIASH_PAR_RSEM_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_COUNTS_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --rsem_counts_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --rsem_counts_gene=*) + [ -n "$VIASH_PAR_RSEM_COUNTS_GENE" ] && ViashError Bad arguments for option \'--rsem_counts_gene=*\': \'$VIASH_PAR_RSEM_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_COUNTS_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --rsem_counts_transcripts) + [ -n "$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS" ] && ViashError Bad arguments for option \'--rsem_counts_transcripts\': \'$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --rsem_counts_transcripts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --rsem_counts_transcripts=*) + [ -n "$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS" ] && ViashError Bad arguments for option \'--rsem_counts_transcripts=*\': \'$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_qc) + [ -n "$VIASH_PAR_SKIP_QC" ] && ViashError Bad arguments for option \'--skip_qc\': \'$VIASH_PAR_SKIP_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_QC=true + shift 1 + ;; + --skip_biotype_qc) + [ -n "$VIASH_PAR_SKIP_BIOTYPE_QC" ] && ViashError Bad arguments for option \'--skip_biotype_qc\': \'$VIASH_PAR_SKIP_BIOTYPE_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_BIOTYPE_QC=true + shift 1 + ;; + --skip_align) + [ -n "$VIASH_PAR_SKIP_ALIGN" ] && ViashError Bad arguments for option \'--skip_align\': \'$VIASH_PAR_SKIP_ALIGN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_ALIGN=true + shift 1 + ;; + --skip_pseudo_align) + [ -n "$VIASH_PAR_SKIP_PSEUDO_ALIGN" ] && ViashError Bad arguments for option \'--skip_pseudo_align\': \'$VIASH_PAR_SKIP_PSEUDO_ALIGN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_PSEUDO_ALIGN=true + shift 1 + ;; + --skip_dupradar) + [ -n "$VIASH_PAR_SKIP_DUPRADAR" ] && ViashError Bad arguments for option \'--skip_dupradar\': \'$VIASH_PAR_SKIP_DUPRADAR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_DUPRADAR=true + shift 1 + ;; + --skip_qualimap) + [ -n "$VIASH_PAR_SKIP_QUALIMAP" ] && ViashError Bad arguments for option \'--skip_qualimap\': \'$VIASH_PAR_SKIP_QUALIMAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_QUALIMAP=true + shift 1 + ;; + --skip_rseqc) + [ -n "$VIASH_PAR_SKIP_RSEQC" ] && ViashError Bad arguments for option \'--skip_rseqc\': \'$VIASH_PAR_SKIP_RSEQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_RSEQC=true + shift 1 + ;; + --skip_multiqc) + [ -n "$VIASH_PAR_SKIP_MULTIQC" ] && ViashError Bad arguments for option \'--skip_multiqc\': \'$VIASH_PAR_SKIP_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_MULTIQC=true + shift 1 + ;; + --skip_preseq) + [ -n "$VIASH_PAR_SKIP_PRESEQ" ] && ViashError Bad arguments for option \'--skip_preseq\': \'$VIASH_PAR_SKIP_PRESEQ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_PRESEQ="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_preseq. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_preseq=*) + [ -n "$VIASH_PAR_SKIP_PRESEQ" ] && ViashError Bad arguments for option \'--skip_preseq=*\': \'$VIASH_PAR_SKIP_PRESEQ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_PRESEQ=$(ViashRemoveFlags "$1") + shift 1 + ;; + --extra_preseq_args) + [ -n "$VIASH_PAR_EXTRA_PRESEQ_ARGS" ] && ViashError Bad arguments for option \'--extra_preseq_args\': \'$VIASH_PAR_EXTRA_PRESEQ_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_PRESEQ_ARGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_preseq_args. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --extra_preseq_args=*) + [ -n "$VIASH_PAR_EXTRA_PRESEQ_ARGS" ] && ViashError Bad arguments for option \'--extra_preseq_args=*\': \'$VIASH_PAR_EXTRA_PRESEQ_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_PRESEQ_ARGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts_group_type) + [ -n "$VIASH_PAR_FEATURECOUNTS_GROUP_TYPE" ] && ViashError Bad arguments for option \'--featurecounts_group_type\': \'$VIASH_PAR_FEATURECOUNTS_GROUP_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_GROUP_TYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_group_type. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_group_type=*) + [ -n "$VIASH_PAR_FEATURECOUNTS_GROUP_TYPE" ] && ViashError Bad arguments for option \'--featurecounts_group_type=*\': \'$VIASH_PAR_FEATURECOUNTS_GROUP_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_GROUP_TYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts_feature_type) + [ -n "$VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE" ] && ViashError Bad arguments for option \'--featurecounts_feature_type\': \'$VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_feature_type. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_feature_type=*) + [ -n "$VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE" ] && ViashError Bad arguments for option \'--featurecounts_feature_type=*\': \'$VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gencode) + [ -n "$VIASH_PAR_GENCODE" ] && ViashError Bad arguments for option \'--gencode\': \'$VIASH_PAR_GENCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENCODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gencode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gencode=*) + [ -n "$VIASH_PAR_GENCODE" ] && ViashError Bad arguments for option \'--gencode=*\': \'$VIASH_PAR_GENCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENCODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --biotypes_header) + [ -n "$VIASH_PAR_BIOTYPES_HEADER" ] && ViashError Bad arguments for option \'--biotypes_header\': \'$VIASH_PAR_BIOTYPES_HEADER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIOTYPES_HEADER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --biotypes_header. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --biotypes_header=*) + [ -n "$VIASH_PAR_BIOTYPES_HEADER" ] && ViashError Bad arguments for option \'--biotypes_header=*\': \'$VIASH_PAR_BIOTYPES_HEADER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIOTYPES_HEADER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --biotype) + [ -n "$VIASH_PAR_BIOTYPE" ] && ViashError Bad arguments for option \'--biotype\': \'$VIASH_PAR_BIOTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIOTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --biotype. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --biotype=*) + [ -n "$VIASH_PAR_BIOTYPE" ] && ViashError Bad arguments for option \'--biotype=*\': \'$VIASH_PAR_BIOTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIOTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --rseqc_modules) + if [ -z "$VIASH_PAR_RSEQC_MODULES" ]; then + VIASH_PAR_RSEQC_MODULES="$2" + else + VIASH_PAR_RSEQC_MODULES="$VIASH_PAR_RSEQC_MODULES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --rseqc_modules. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --rseqc_modules=*) + if [ -z "$VIASH_PAR_RSEQC_MODULES" ]; then + VIASH_PAR_RSEQC_MODULES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_RSEQC_MODULES="$VIASH_PAR_RSEQC_MODULES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sample_size) + [ -n "$VIASH_PAR_SAMPLE_SIZE" ] && ViashError Bad arguments for option \'--sample_size\': \'$VIASH_PAR_SAMPLE_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sample_size=*) + [ -n "$VIASH_PAR_SAMPLE_SIZE" ] && ViashError Bad arguments for option \'--sample_size=*\': \'$VIASH_PAR_SAMPLE_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lower_bound_size) + [ -n "$VIASH_PAR_LOWER_BOUND_SIZE" ] && ViashError Bad arguments for option \'--lower_bound_size\': \'$VIASH_PAR_LOWER_BOUND_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOWER_BOUND_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lower_bound_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --lower_bound_size=*) + [ -n "$VIASH_PAR_LOWER_BOUND_SIZE" ] && ViashError Bad arguments for option \'--lower_bound_size=*\': \'$VIASH_PAR_LOWER_BOUND_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOWER_BOUND_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --upper_bound_size) + [ -n "$VIASH_PAR_UPPER_BOUND_SIZE" ] && ViashError Bad arguments for option \'--upper_bound_size\': \'$VIASH_PAR_UPPER_BOUND_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UPPER_BOUND_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --upper_bound_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --upper_bound_size=*) + [ -n "$VIASH_PAR_UPPER_BOUND_SIZE" ] && ViashError Bad arguments for option \'--upper_bound_size=*\': \'$VIASH_PAR_UPPER_BOUND_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UPPER_BOUND_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --step_size) + [ -n "$VIASH_PAR_STEP_SIZE" ] && ViashError Bad arguments for option \'--step_size\': \'$VIASH_PAR_STEP_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STEP_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --step_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --step_size=*) + [ -n "$VIASH_PAR_STEP_SIZE" ] && ViashError Bad arguments for option \'--step_size=*\': \'$VIASH_PAR_STEP_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STEP_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --map_qual) + [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAP_QUAL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --map_qual. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --map_qual=*) + [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual=*\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAP_QUAL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_intron) + [ -n "$VIASH_PAR_MIN_INTRON" ] && ViashError Bad arguments for option \'--min_intron\': \'$VIASH_PAR_MIN_INTRON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_INTRON="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_intron. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_intron=*) + [ -n "$VIASH_PAR_MIN_INTRON" ] && ViashError Bad arguments for option \'--min_intron=*\': \'$VIASH_PAR_MIN_INTRON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_INTRON=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_splice_read) + [ -n "$VIASH_PAR_MIN_SPLICE_READ" ] && ViashError Bad arguments for option \'--min_splice_read\': \'$VIASH_PAR_MIN_SPLICE_READ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SPLICE_READ="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_splice_read. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_splice_read=*) + [ -n "$VIASH_PAR_MIN_SPLICE_READ" ] && ViashError Bad arguments for option \'--min_splice_read=*\': \'$VIASH_PAR_MIN_SPLICE_READ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SPLICE_READ=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sampling_percentile_lower_bound) + [ -n "$VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND" ] && ViashError Bad arguments for option \'--sampling_percentile_lower_bound\': \'$VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sampling_percentile_lower_bound. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sampling_percentile_lower_bound=*) + [ -n "$VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND" ] && ViashError Bad arguments for option \'--sampling_percentile_lower_bound=*\': \'$VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sampling_percentile_upper_bound) + [ -n "$VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND" ] && ViashError Bad arguments for option \'--sampling_percentile_upper_bound\': \'$VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sampling_percentile_upper_bound. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sampling_percentile_upper_bound=*) + [ -n "$VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND" ] && ViashError Bad arguments for option \'--sampling_percentile_upper_bound=*\': \'$VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sampling_percentile_step) + [ -n "$VIASH_PAR_SAMPLING_PERCENTILE_STEP" ] && ViashError Bad arguments for option \'--sampling_percentile_step\': \'$VIASH_PAR_SAMPLING_PERCENTILE_STEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_PERCENTILE_STEP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sampling_percentile_step. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sampling_percentile_step=*) + [ -n "$VIASH_PAR_SAMPLING_PERCENTILE_STEP" ] && ViashError Bad arguments for option \'--sampling_percentile_step=*\': \'$VIASH_PAR_SAMPLING_PERCENTILE_STEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_PERCENTILE_STEP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --read_count_upper_limit) + [ -n "$VIASH_PAR_READ_COUNT_UPPER_LIMIT" ] && ViashError Bad arguments for option \'--read_count_upper_limit\': \'$VIASH_PAR_READ_COUNT_UPPER_LIMIT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_COUNT_UPPER_LIMIT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_count_upper_limit. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_count_upper_limit=*) + [ -n "$VIASH_PAR_READ_COUNT_UPPER_LIMIT" ] && ViashError Bad arguments for option \'--read_count_upper_limit=*\': \'$VIASH_PAR_READ_COUNT_UPPER_LIMIT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_COUNT_UPPER_LIMIT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --minimum_coverage) + [ -n "$VIASH_PAR_MINIMUM_COVERAGE" ] && ViashError Bad arguments for option \'--minimum_coverage\': \'$VIASH_PAR_MINIMUM_COVERAGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMUM_COVERAGE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --minimum_coverage. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --minimum_coverage=*) + [ -n "$VIASH_PAR_MINIMUM_COVERAGE" ] && ViashError Bad arguments for option \'--minimum_coverage=*\': \'$VIASH_PAR_MINIMUM_COVERAGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMUM_COVERAGE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tin_sample_size) + [ -n "$VIASH_PAR_TIN_SAMPLE_SIZE" ] && ViashError Bad arguments for option \'--tin_sample_size\': \'$VIASH_PAR_TIN_SAMPLE_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TIN_SAMPLE_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tin_sample_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tin_sample_size=*) + [ -n "$VIASH_PAR_TIN_SAMPLE_SIZE" ] && ViashError Bad arguments for option \'--tin_sample_size=*\': \'$VIASH_PAR_TIN_SAMPLE_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TIN_SAMPLE_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --subtract_background) + [ -n "$VIASH_PAR_SUBTRACT_BACKGROUND" ] && ViashError Bad arguments for option \'--subtract_background\': \'$VIASH_PAR_SUBTRACT_BACKGROUND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUBTRACT_BACKGROUND=true + shift 1 + ;; + --pr_bases) + [ -n "$VIASH_PAR_PR_BASES" ] && ViashError Bad arguments for option \'--pr_bases\': \'$VIASH_PAR_PR_BASES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PR_BASES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pr_bases. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pr_bases=*) + [ -n "$VIASH_PAR_PR_BASES" ] && ViashError Bad arguments for option \'--pr_bases=*\': \'$VIASH_PAR_PR_BASES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PR_BASES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tr_bias) + [ -n "$VIASH_PAR_TR_BIAS" ] && ViashError Bad arguments for option \'--tr_bias\': \'$VIASH_PAR_TR_BIAS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TR_BIAS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tr_bias. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tr_bias=*) + [ -n "$VIASH_PAR_TR_BIAS" ] && ViashError Bad arguments for option \'--tr_bias=*\': \'$VIASH_PAR_TR_BIAS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TR_BIAS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --algorithm) + [ -n "$VIASH_PAR_ALGORITHM" ] && ViashError Bad arguments for option \'--algorithm\': \'$VIASH_PAR_ALGORITHM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALGORITHM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --algorithm. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --algorithm=*) + [ -n "$VIASH_PAR_ALGORITHM" ] && ViashError Bad arguments for option \'--algorithm=*\': \'$VIASH_PAR_ALGORITHM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALGORITHM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sequencing_protocol) + [ -n "$VIASH_PAR_SEQUENCING_PROTOCOL" ] && ViashError Bad arguments for option \'--sequencing_protocol\': \'$VIASH_PAR_SEQUENCING_PROTOCOL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEQUENCING_PROTOCOL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sequencing_protocol. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sequencing_protocol=*) + [ -n "$VIASH_PAR_SEQUENCING_PROTOCOL" ] && ViashError Bad arguments for option \'--sequencing_protocol=*\': \'$VIASH_PAR_SEQUENCING_PROTOCOL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEQUENCING_PROTOCOL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sorted) + [ -n "$VIASH_PAR_SORTED" ] && ViashError Bad arguments for option \'--sorted\': \'$VIASH_PAR_SORTED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SORTED=true + shift 1 + ;; + --java_memory_size) + [ -n "$VIASH_PAR_JAVA_MEMORY_SIZE" ] && ViashError Bad arguments for option \'--java_memory_size\': \'$VIASH_PAR_JAVA_MEMORY_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JAVA_MEMORY_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --java_memory_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --java_memory_size=*) + [ -n "$VIASH_PAR_JAVA_MEMORY_SIZE" ] && ViashError Bad arguments for option \'--java_memory_size=*\': \'$VIASH_PAR_JAVA_MEMORY_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JAVA_MEMORY_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_deseq2_qc) + [ -n "$VIASH_PAR_SKIP_DESEQ2_QC" ] && ViashError Bad arguments for option \'--skip_deseq2_qc\': \'$VIASH_PAR_SKIP_DESEQ2_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_DESEQ2_QC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_deseq2_qc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_deseq2_qc=*) + [ -n "$VIASH_PAR_SKIP_DESEQ2_QC" ] && ViashError Bad arguments for option \'--skip_deseq2_qc=*\': \'$VIASH_PAR_SKIP_DESEQ2_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_DESEQ2_QC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --deseq2_vst) + [ -n "$VIASH_PAR_DESEQ2_VST" ] && ViashError Bad arguments for option \'--deseq2_vst\': \'$VIASH_PAR_DESEQ2_VST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DESEQ2_VST="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --deseq2_vst. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --deseq2_vst=*) + [ -n "$VIASH_PAR_DESEQ2_VST" ] && ViashError Bad arguments for option \'--deseq2_vst=*\': \'$VIASH_PAR_DESEQ2_VST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DESEQ2_VST=$(ViashRemoveFlags "$1") + shift 1 + ;; + --multiqc_custom_config) + [ -n "$VIASH_PAR_MULTIQC_CUSTOM_CONFIG" ] && ViashError Bad arguments for option \'--multiqc_custom_config\': \'$VIASH_PAR_MULTIQC_CUSTOM_CONFIG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_CUSTOM_CONFIG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_custom_config. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_custom_config=*) + [ -n "$VIASH_PAR_MULTIQC_CUSTOM_CONFIG" ] && ViashError Bad arguments for option \'--multiqc_custom_config=*\': \'$VIASH_PAR_MULTIQC_CUSTOM_CONFIG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_CUSTOM_CONFIG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --multiqc_title) + [ -n "$VIASH_PAR_MULTIQC_TITLE" ] && ViashError Bad arguments for option \'--multiqc_title\': \'$VIASH_PAR_MULTIQC_TITLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_TITLE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_title. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_title=*) + [ -n "$VIASH_PAR_MULTIQC_TITLE" ] && ViashError Bad arguments for option \'--multiqc_title=*\': \'$VIASH_PAR_MULTIQC_TITLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_TITLE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --multiqc_methods_description) + [ -n "$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION" ] && ViashError Bad arguments for option \'--multiqc_methods_description\': \'$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_METHODS_DESCRIPTION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_methods_description. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_methods_description=*) + [ -n "$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION" ] && ViashError Bad arguments for option \'--multiqc_methods_description=*\': \'$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_METHODS_DESCRIPTION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --passed_trimmed_reads) + [ -n "$VIASH_PAR_PASSED_TRIMMED_READS" ] && ViashError Bad arguments for option \'--passed_trimmed_reads\': \'$VIASH_PAR_PASSED_TRIMMED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PASSED_TRIMMED_READS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --passed_trimmed_reads. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --passed_trimmed_reads=*) + [ -n "$VIASH_PAR_PASSED_TRIMMED_READS" ] && ViashError Bad arguments for option \'--passed_trimmed_reads=*\': \'$VIASH_PAR_PASSED_TRIMMED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PASSED_TRIMMED_READS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --num_trimmed_reads) + [ -n "$VIASH_PAR_NUM_TRIMMED_READS" ] && ViashError Bad arguments for option \'--num_trimmed_reads\': \'$VIASH_PAR_NUM_TRIMMED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_TRIMMED_READS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --num_trimmed_reads. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --num_trimmed_reads=*) + [ -n "$VIASH_PAR_NUM_TRIMMED_READS" ] && ViashError Bad arguments for option \'--num_trimmed_reads=*\': \'$VIASH_PAR_NUM_TRIMMED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_TRIMMED_READS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --passed_mapping) + [ -n "$VIASH_PAR_PASSED_MAPPING" ] && ViashError Bad arguments for option \'--passed_mapping\': \'$VIASH_PAR_PASSED_MAPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PASSED_MAPPING="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --passed_mapping. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --passed_mapping=*) + [ -n "$VIASH_PAR_PASSED_MAPPING" ] && ViashError Bad arguments for option \'--passed_mapping=*\': \'$VIASH_PAR_PASSED_MAPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PASSED_MAPPING=$(ViashRemoveFlags "$1") + shift 1 + ;; + --percent_mapped) + [ -n "$VIASH_PAR_PERCENT_MAPPED" ] && ViashError Bad arguments for option \'--percent_mapped\': \'$VIASH_PAR_PERCENT_MAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PERCENT_MAPPED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --percent_mapped. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --percent_mapped=*) + [ -n "$VIASH_PAR_PERCENT_MAPPED" ] && ViashError Bad arguments for option \'--percent_mapped=*\': \'$VIASH_PAR_PERCENT_MAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PERCENT_MAPPED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastqc_zip_1) + [ -n "$VIASH_PAR_FASTQC_ZIP_1" ] && ViashError Bad arguments for option \'--fastqc_zip_1\': \'$VIASH_PAR_FASTQC_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_ZIP_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_zip_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastqc_zip_1=*) + [ -n "$VIASH_PAR_FASTQC_ZIP_1" ] && ViashError Bad arguments for option \'--fastqc_zip_1=*\': \'$VIASH_PAR_FASTQC_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_ZIP_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastqc_zip_2) + [ -n "$VIASH_PAR_FASTQC_ZIP_2" ] && ViashError Bad arguments for option \'--fastqc_zip_2\': \'$VIASH_PAR_FASTQC_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_ZIP_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_zip_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastqc_zip_2=*) + [ -n "$VIASH_PAR_FASTQC_ZIP_2" ] && ViashError Bad arguments for option \'--fastqc_zip_2=*\': \'$VIASH_PAR_FASTQC_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_ZIP_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_zip_1) + [ -n "$VIASH_PAR_TRIM_ZIP_1" ] && ViashError Bad arguments for option \'--trim_zip_1\': \'$VIASH_PAR_TRIM_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_ZIP_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_zip_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_zip_1=*) + [ -n "$VIASH_PAR_TRIM_ZIP_1" ] && ViashError Bad arguments for option \'--trim_zip_1=*\': \'$VIASH_PAR_TRIM_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_ZIP_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_zip_2) + [ -n "$VIASH_PAR_TRIM_ZIP_2" ] && ViashError Bad arguments for option \'--trim_zip_2\': \'$VIASH_PAR_TRIM_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_ZIP_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_zip_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_zip_2=*) + [ -n "$VIASH_PAR_TRIM_ZIP_2" ] && ViashError Bad arguments for option \'--trim_zip_2=*\': \'$VIASH_PAR_TRIM_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_ZIP_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_log_1) + [ -n "$VIASH_PAR_TRIM_LOG_1" ] && ViashError Bad arguments for option \'--trim_log_1\': \'$VIASH_PAR_TRIM_LOG_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_LOG_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_log_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_log_1=*) + [ -n "$VIASH_PAR_TRIM_LOG_1" ] && ViashError Bad arguments for option \'--trim_log_1=*\': \'$VIASH_PAR_TRIM_LOG_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_LOG_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_log_2) + [ -n "$VIASH_PAR_TRIM_LOG_2" ] && ViashError Bad arguments for option \'--trim_log_2\': \'$VIASH_PAR_TRIM_LOG_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_LOG_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_log_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_log_2=*) + [ -n "$VIASH_PAR_TRIM_LOG_2" ] && ViashError Bad arguments for option \'--trim_log_2=*\': \'$VIASH_PAR_TRIM_LOG_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_LOG_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sortmerna_multiqc) + [ -n "$VIASH_PAR_SORTMERNA_MULTIQC" ] && ViashError Bad arguments for option \'--sortmerna_multiqc\': \'$VIASH_PAR_SORTMERNA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SORTMERNA_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sortmerna_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sortmerna_multiqc=*) + [ -n "$VIASH_PAR_SORTMERNA_MULTIQC" ] && ViashError Bad arguments for option \'--sortmerna_multiqc=*\': \'$VIASH_PAR_SORTMERNA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SORTMERNA_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --star_multiqc) + [ -n "$VIASH_PAR_STAR_MULTIQC" ] && ViashError Bad arguments for option \'--star_multiqc\': \'$VIASH_PAR_STAR_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_multiqc=*) + [ -n "$VIASH_PAR_STAR_MULTIQC" ] && ViashError Bad arguments for option \'--star_multiqc=*\': \'$VIASH_PAR_STAR_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --rsem_multiqc) + [ -n "$VIASH_PAR_RSEM_MULTIQC" ] && ViashError Bad arguments for option \'--rsem_multiqc\': \'$VIASH_PAR_RSEM_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --rsem_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --rsem_multiqc=*) + [ -n "$VIASH_PAR_RSEM_MULTIQC" ] && ViashError Bad arguments for option \'--rsem_multiqc=*\': \'$VIASH_PAR_RSEM_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_stats) + [ -n "$VIASH_PAR_GENOME_BAM_STATS" ] && ViashError Bad arguments for option \'--genome_bam_stats\': \'$VIASH_PAR_GENOME_BAM_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_STATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_stats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_stats=*) + [ -n "$VIASH_PAR_GENOME_BAM_STATS" ] && ViashError Bad arguments for option \'--genome_bam_stats=*\': \'$VIASH_PAR_GENOME_BAM_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_STATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_flagstat) + [ -n "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && ViashError Bad arguments for option \'--genome_bam_flagstat\': \'$VIASH_PAR_GENOME_BAM_FLAGSTAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_FLAGSTAT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_flagstat. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_flagstat=*) + [ -n "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && ViashError Bad arguments for option \'--genome_bam_flagstat=*\': \'$VIASH_PAR_GENOME_BAM_FLAGSTAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_FLAGSTAT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_idxstats) + [ -n "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && ViashError Bad arguments for option \'--genome_bam_idxstats\': \'$VIASH_PAR_GENOME_BAM_IDXSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_IDXSTATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_idxstats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_idxstats=*) + [ -n "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && ViashError Bad arguments for option \'--genome_bam_idxstats=*\': \'$VIASH_PAR_GENOME_BAM_IDXSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_IDXSTATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --markduplicates_multiqc) + [ -n "$VIASH_PAR_MARKDUPLICATES_MULTIQC" ] && ViashError Bad arguments for option \'--markduplicates_multiqc\': \'$VIASH_PAR_MARKDUPLICATES_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MARKDUPLICATES_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --markduplicates_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --markduplicates_multiqc=*) + [ -n "$VIASH_PAR_MARKDUPLICATES_MULTIQC" ] && ViashError Bad arguments for option \'--markduplicates_multiqc=*\': \'$VIASH_PAR_MARKDUPLICATES_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MARKDUPLICATES_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_multiqc) + [ -n "$VIASH_PAR_PSEUDO_MULTIQC" ] && ViashError Bad arguments for option \'--pseudo_multiqc\': \'$VIASH_PAR_PSEUDO_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_multiqc=*) + [ -n "$VIASH_PAR_PSEUDO_MULTIQC" ] && ViashError Bad arguments for option \'--pseudo_multiqc=*\': \'$VIASH_PAR_PSEUDO_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --preseq_output) + [ -n "$VIASH_PAR_PRESEQ_OUTPUT" ] && ViashError Bad arguments for option \'--preseq_output\': \'$VIASH_PAR_PRESEQ_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PRESEQ_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --preseq_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --preseq_output=*) + [ -n "$VIASH_PAR_PRESEQ_OUTPUT" ] && ViashError Bad arguments for option \'--preseq_output=*\': \'$VIASH_PAR_PRESEQ_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PRESEQ_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bamstat_output) + [ -n "$VIASH_PAR_BAMSTAT_OUTPUT" ] && ViashError Bad arguments for option \'--bamstat_output\': \'$VIASH_PAR_BAMSTAT_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMSTAT_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamstat_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bamstat_output=*) + [ -n "$VIASH_PAR_BAMSTAT_OUTPUT" ] && ViashError Bad arguments for option \'--bamstat_output=*\': \'$VIASH_PAR_BAMSTAT_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMSTAT_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --strandedness_output) + [ -n "$VIASH_PAR_STRANDEDNESS_OUTPUT" ] && ViashError Bad arguments for option \'--strandedness_output\': \'$VIASH_PAR_STRANDEDNESS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --strandedness_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strandedness_output=*) + [ -n "$VIASH_PAR_STRANDEDNESS_OUTPUT" ] && ViashError Bad arguments for option \'--strandedness_output=*\': \'$VIASH_PAR_STRANDEDNESS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --inner_dist_output_stats) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_STATS" ] && ViashError Bad arguments for option \'--inner_dist_output_stats\': \'$VIASH_PAR_INNER_DIST_OUTPUT_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_STATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --inner_dist_output_stats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --inner_dist_output_stats=*) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_STATS" ] && ViashError Bad arguments for option \'--inner_dist_output_stats=*\': \'$VIASH_PAR_INNER_DIST_OUTPUT_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_STATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --inner_dist_output_dist) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_DIST" ] && ViashError Bad arguments for option \'--inner_dist_output_dist\': \'$VIASH_PAR_INNER_DIST_OUTPUT_DIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_DIST="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --inner_dist_output_dist. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --inner_dist_output_dist=*) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_DIST" ] && ViashError Bad arguments for option \'--inner_dist_output_dist=*\': \'$VIASH_PAR_INNER_DIST_OUTPUT_DIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_DIST=$(ViashRemoveFlags "$1") + shift 1 + ;; + --inner_dist_output_freq) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_FREQ" ] && ViashError Bad arguments for option \'--inner_dist_output_freq\': \'$VIASH_PAR_INNER_DIST_OUTPUT_FREQ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_FREQ="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --inner_dist_output_freq. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --inner_dist_output_freq=*) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_FREQ" ] && ViashError Bad arguments for option \'--inner_dist_output_freq=*\': \'$VIASH_PAR_INNER_DIST_OUTPUT_FREQ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_FREQ=$(ViashRemoveFlags "$1") + shift 1 + ;; + --inner_dist_output_plot) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT" ] && ViashError Bad arguments for option \'--inner_dist_output_plot\': \'$VIASH_PAR_INNER_DIST_OUTPUT_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --inner_dist_output_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --inner_dist_output_plot=*) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT" ] && ViashError Bad arguments for option \'--inner_dist_output_plot=*\': \'$VIASH_PAR_INNER_DIST_OUTPUT_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --inner_dist_output_plot_r) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--inner_dist_output_plot_r\': \'$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --inner_dist_output_plot_r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --inner_dist_output_plot_r=*) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--inner_dist_output_plot_r=*\': \'$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_log) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG" ] && ViashError Bad arguments for option \'--junction_annotation_output_log\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_log. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_log=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG" ] && ViashError Bad arguments for option \'--junction_annotation_output_log=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_plot_r) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--junction_annotation_output_plot_r\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_plot_r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_plot_r=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--junction_annotation_output_plot_r=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_junction_bed) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED" ] && ViashError Bad arguments for option \'--junction_annotation_output_junction_bed\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_junction_bed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_junction_bed=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED" ] && ViashError Bad arguments for option \'--junction_annotation_output_junction_bed=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_junction_interact) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT" ] && ViashError Bad arguments for option \'--junction_annotation_output_junction_interact\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_junction_interact. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_junction_interact=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT" ] && ViashError Bad arguments for option \'--junction_annotation_output_junction_interact=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_junction_sheet) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET" ] && ViashError Bad arguments for option \'--junction_annotation_output_junction_sheet\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_junction_sheet. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_junction_sheet=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET" ] && ViashError Bad arguments for option \'--junction_annotation_output_junction_sheet=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_splice_events_plot) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT" ] && ViashError Bad arguments for option \'--junction_annotation_output_splice_events_plot\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_splice_events_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_splice_events_plot=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT" ] && ViashError Bad arguments for option \'--junction_annotation_output_splice_events_plot=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_splice_junctions_plot) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT" ] && ViashError Bad arguments for option \'--junction_annotation_output_splice_junctions_plot\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_splice_junctions_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_splice_junctions_plot=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT" ] && ViashError Bad arguments for option \'--junction_annotation_output_splice_junctions_plot=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_saturation_output_plot_r) + [ -n "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--junction_saturation_output_plot_r\': \'$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_saturation_output_plot_r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_saturation_output_plot_r=*) + [ -n "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--junction_saturation_output_plot_r=*\': \'$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_saturation_output_plot) + [ -n "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT" ] && ViashError Bad arguments for option \'--junction_saturation_output_plot\': \'$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_saturation_output_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_saturation_output_plot=*) + [ -n "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT" ] && ViashError Bad arguments for option \'--junction_saturation_output_plot=*\': \'$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --read_distribution_output) + [ -n "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT" ] && ViashError Bad arguments for option \'--read_distribution_output\': \'$VIASH_PAR_READ_DISTRIBUTION_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DISTRIBUTION_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_distribution_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_distribution_output=*) + [ -n "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT" ] && ViashError Bad arguments for option \'--read_distribution_output=*\': \'$VIASH_PAR_READ_DISTRIBUTION_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DISTRIBUTION_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --read_duplication_output_duplication_rate_plot_r) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_plot_r\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_duplication_output_duplication_rate_plot_r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_duplication_output_duplication_rate_plot_r=*) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_plot_r=*\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R=$(ViashRemoveFlags "$1") + shift 1 + ;; + --read_duplication_output_duplication_rate_plot) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_plot\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_duplication_output_duplication_rate_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_duplication_output_duplication_rate_plot=*) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_plot=*\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --read_duplication_output_duplication_rate_mapping) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_mapping\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_duplication_output_duplication_rate_mapping. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_duplication_output_duplication_rate_mapping=*) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_mapping=*\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING=$(ViashRemoveFlags "$1") + shift 1 + ;; + --read_duplication_output_duplication_rate_sequence) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_sequence\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_duplication_output_duplication_rate_sequence. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_duplication_output_duplication_rate_sequence=*) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_sequence=*\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tin_output_summary) + [ -n "$VIASH_PAR_TIN_OUTPUT_SUMMARY" ] && ViashError Bad arguments for option \'--tin_output_summary\': \'$VIASH_PAR_TIN_OUTPUT_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TIN_OUTPUT_SUMMARY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tin_output_summary. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tin_output_summary=*) + [ -n "$VIASH_PAR_TIN_OUTPUT_SUMMARY" ] && ViashError Bad arguments for option \'--tin_output_summary=*\': \'$VIASH_PAR_TIN_OUTPUT_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TIN_OUTPUT_SUMMARY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tin_output_metrics) + [ -n "$VIASH_PAR_TIN_OUTPUT_METRICS" ] && ViashError Bad arguments for option \'--tin_output_metrics\': \'$VIASH_PAR_TIN_OUTPUT_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TIN_OUTPUT_METRICS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tin_output_metrics. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tin_output_metrics=*) + [ -n "$VIASH_PAR_TIN_OUTPUT_METRICS" ] && ViashError Bad arguments for option \'--tin_output_metrics=*\': \'$VIASH_PAR_TIN_OUTPUT_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TIN_OUTPUT_METRICS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_dupmatrix) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX" ] && ViashError Bad arguments for option \'--dupradar_output_dupmatrix\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_dupmatrix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_dupmatrix=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX" ] && ViashError Bad arguments for option \'--dupradar_output_dupmatrix=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_dup_intercept_mqc) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ] && ViashError Bad arguments for option \'--dupradar_output_dup_intercept_mqc\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_dup_intercept_mqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_dup_intercept_mqc=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ] && ViashError Bad arguments for option \'--dupradar_output_dup_intercept_mqc=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_duprate_exp_boxplot) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ] && ViashError Bad arguments for option \'--dupradar_output_duprate_exp_boxplot\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_duprate_exp_boxplot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_duprate_exp_boxplot=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ] && ViashError Bad arguments for option \'--dupradar_output_duprate_exp_boxplot=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_duprate_exp_densplot) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ] && ViashError Bad arguments for option \'--dupradar_output_duprate_exp_densplot\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_duprate_exp_densplot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_duprate_exp_densplot=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ] && ViashError Bad arguments for option \'--dupradar_output_duprate_exp_densplot=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_duprate_exp_denscurve_mqc) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ] && ViashError Bad arguments for option \'--dupradar_output_duprate_exp_denscurve_mqc\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_duprate_exp_denscurve_mqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_duprate_exp_denscurve_mqc=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ] && ViashError Bad arguments for option \'--dupradar_output_duprate_exp_denscurve_mqc=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_expression_histogram) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM" ] && ViashError Bad arguments for option \'--dupradar_output_expression_histogram\': \'$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_expression_histogram. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_expression_histogram=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM" ] && ViashError Bad arguments for option \'--dupradar_output_expression_histogram=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_intercept_slope) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ] && ViashError Bad arguments for option \'--dupradar_output_intercept_slope\': \'$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_intercept_slope. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_intercept_slope=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ] && ViashError Bad arguments for option \'--dupradar_output_intercept_slope=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --qualimap_qc_report) + [ -n "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && ViashError Bad arguments for option \'--qualimap_qc_report\': \'$VIASH_PAR_QUALIMAP_QC_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_QC_REPORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_qc_report. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --qualimap_qc_report=*) + [ -n "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && ViashError Bad arguments for option \'--qualimap_qc_report=*\': \'$VIASH_PAR_QUALIMAP_QC_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_QC_REPORT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --qualimap_counts) + [ -n "$VIASH_PAR_QUALIMAP_COUNTS" ] && ViashError Bad arguments for option \'--qualimap_counts\': \'$VIASH_PAR_QUALIMAP_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --qualimap_counts=*) + [ -n "$VIASH_PAR_QUALIMAP_COUNTS" ] && ViashError Bad arguments for option \'--qualimap_counts=*\': \'$VIASH_PAR_QUALIMAP_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --qualimap_report) + [ -n "$VIASH_PAR_QUALIMAP_REPORT" ] && ViashError Bad arguments for option \'--qualimap_report\': \'$VIASH_PAR_QUALIMAP_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_REPORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_report. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --qualimap_report=*) + [ -n "$VIASH_PAR_QUALIMAP_REPORT" ] && ViashError Bad arguments for option \'--qualimap_report=*\': \'$VIASH_PAR_QUALIMAP_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_REPORT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --deseq2_output) + [ -n "$VIASH_PAR_DESEQ2_OUTPUT" ] && ViashError Bad arguments for option \'--deseq2_output\': \'$VIASH_PAR_DESEQ2_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DESEQ2_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --deseq2_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --deseq2_output=*) + [ -n "$VIASH_PAR_DESEQ2_OUTPUT" ] && ViashError Bad arguments for option \'--deseq2_output=*\': \'$VIASH_PAR_DESEQ2_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DESEQ2_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --deseq2_output_pseudo) + [ -n "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO" ] && ViashError Bad arguments for option \'--deseq2_output_pseudo\': \'$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DESEQ2_OUTPUT_PSEUDO="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --deseq2_output_pseudo. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --deseq2_output_pseudo=*) + [ -n "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO" ] && ViashError Bad arguments for option \'--deseq2_output_pseudo=*\': \'$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DESEQ2_OUTPUT_PSEUDO=$(ViashRemoveFlags "$1") + shift 1 + ;; + --multiqc_report) + [ -n "$VIASH_PAR_MULTIQC_REPORT" ] && ViashError Bad arguments for option \'--multiqc_report\': \'$VIASH_PAR_MULTIQC_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_REPORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_report. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_report=*) + [ -n "$VIASH_PAR_MULTIQC_REPORT" ] && ViashError Bad arguments for option \'--multiqc_report=*\': \'$VIASH_PAR_MULTIQC_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_REPORT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --multiqc_data) + [ -n "$VIASH_PAR_MULTIQC_DATA" ] && ViashError Bad arguments for option \'--multiqc_data\': \'$VIASH_PAR_MULTIQC_DATA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_DATA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_data. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_data=*) + [ -n "$VIASH_PAR_MULTIQC_DATA" ] && ViashError Bad arguments for option \'--multiqc_data=*\': \'$VIASH_PAR_MULTIQC_DATA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_DATA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --multiqc_plots) + [ -n "$VIASH_PAR_MULTIQC_PLOTS" ] && ViashError Bad arguments for option \'--multiqc_plots\': \'$VIASH_PAR_MULTIQC_PLOTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_PLOTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_plots. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_plots=*) + [ -n "$VIASH_PAR_MULTIQC_PLOTS" ] && ViashError Bad arguments for option \'--multiqc_plots=*\': \'$VIASH_PAR_MULTIQC_PLOTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_PLOTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts) + [ -n "$VIASH_PAR_FEATURECOUNTS" ] && ViashError Bad arguments for option \'--featurecounts\': \'$VIASH_PAR_FEATURECOUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts=*) + [ -n "$VIASH_PAR_FEATURECOUNTS" ] && ViashError Bad arguments for option \'--featurecounts=*\': \'$VIASH_PAR_FEATURECOUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts_summary) + [ -n "$VIASH_PAR_FEATURECOUNTS_SUMMARY" ] && ViashError Bad arguments for option \'--featurecounts_summary\': \'$VIASH_PAR_FEATURECOUNTS_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_SUMMARY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_summary. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_summary=*) + [ -n "$VIASH_PAR_FEATURECOUNTS_SUMMARY" ] && ViashError Bad arguments for option \'--featurecounts_summary=*\': \'$VIASH_PAR_FEATURECOUNTS_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_SUMMARY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts_multiqc) + [ -n "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ] && ViashError Bad arguments for option \'--featurecounts_multiqc\': \'$VIASH_PAR_FEATURECOUNTS_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_multiqc=*) + [ -n "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ] && ViashError Bad arguments for option \'--featurecounts_multiqc=*\': \'$VIASH_PAR_FEATURECOUNTS_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts_rrna_multiqc) + [ -n "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ] && ViashError Bad arguments for option \'--featurecounts_rrna_multiqc\': \'$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_rrna_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_rrna_multiqc=*) + [ -n "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ] && ViashError Bad arguments for option \'--featurecounts_rrna_multiqc=*\': \'$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tpm_gene) + [ -n "$VIASH_PAR_TPM_GENE" ] && ViashError Bad arguments for option \'--tpm_gene\': \'$VIASH_PAR_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tpm_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tpm_gene=*) + [ -n "$VIASH_PAR_TPM_GENE" ] && ViashError Bad arguments for option \'--tpm_gene=*\': \'$VIASH_PAR_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene) + [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene=*) + [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene=*\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene_length_scaled) + [ -n "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_length_scaled\': \'$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene_length_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene_length_scaled=*) + [ -n "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_length_scaled=*\': \'$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene_scaled) + [ -n "$VIASH_PAR_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_scaled\': \'$VIASH_PAR_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene_scaled=*) + [ -n "$VIASH_PAR_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_scaled=*\': \'$VIASH_PAR_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tpm_transcript) + [ -n "$VIASH_PAR_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--tpm_transcript\': \'$VIASH_PAR_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tpm_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tpm_transcript=*) + [ -n "$VIASH_PAR_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--tpm_transcript=*\': \'$VIASH_PAR_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_transcript) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--counts_transcript\': \'$VIASH_PAR_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_transcript=*) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--counts_transcript=*\': \'$VIASH_PAR_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quant_merged_summarizedexperiment) + [ -n "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && ViashError Bad arguments for option \'--quant_merged_summarizedexperiment\': \'$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_merged_summarizedexperiment. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quant_merged_summarizedexperiment=*) + [ -n "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && ViashError Bad arguments for option \'--quant_merged_summarizedexperiment=*\': \'$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_tpm_gene) + [ -n "$VIASH_PAR_PSEUDO_TPM_GENE" ] && ViashError Bad arguments for option \'--pseudo_tpm_gene\': \'$VIASH_PAR_PSEUDO_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_TPM_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_tpm_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_tpm_gene=*) + [ -n "$VIASH_PAR_PSEUDO_TPM_GENE" ] && ViashError Bad arguments for option \'--pseudo_tpm_gene=*\': \'$VIASH_PAR_PSEUDO_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_TPM_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_counts_gene) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_GENE" ] && ViashError Bad arguments for option \'--pseudo_counts_gene\': \'$VIASH_PAR_PSEUDO_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_counts_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_counts_gene=*) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_GENE" ] && ViashError Bad arguments for option \'--pseudo_counts_gene=*\': \'$VIASH_PAR_PSEUDO_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_counts_gene_length_scaled) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--pseudo_counts_gene_length_scaled\': \'$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_counts_gene_length_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_counts_gene_length_scaled=*) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--pseudo_counts_gene_length_scaled=*\': \'$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_counts_gene_scaled) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--pseudo_counts_gene_scaled\': \'$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_counts_gene_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_counts_gene_scaled=*) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--pseudo_counts_gene_scaled=*\': \'$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_tpm_transcript) + [ -n "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--pseudo_tpm_transcript\': \'$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_TPM_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_tpm_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_tpm_transcript=*) + [ -n "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--pseudo_tpm_transcript=*\': \'$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_TPM_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_counts_transcript) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--pseudo_counts_transcript\': \'$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_counts_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_counts_transcript=*) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--pseudo_counts_transcript=*\': \'$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_quant_merged_summarizedexperiment) + [ -n "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && ViashError Bad arguments for option \'--pseudo_quant_merged_summarizedexperiment\': \'$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_quant_merged_summarizedexperiment. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_quant_merged_summarizedexperiment=*) + [ -n "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && ViashError Bad arguments for option \'--pseudo_quant_merged_summarizedexperiment=*\': \'$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: native." + exit 1 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_ID+x} ]; then + ViashError '--id' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_STRANDEDNESS+x} ]; then + VIASH_PAR_STRANDEDNESS="unstranded" +fi +if [ -z ${VIASH_PAR_GTF_GROUP_FEATURES+x} ]; then + VIASH_PAR_GTF_GROUP_FEATURES="gene_id" +fi +if [ -z ${VIASH_PAR_GTF_EXTRA_ATTRIBUTES+x} ]; then + VIASH_PAR_GTF_EXTRA_ATTRIBUTES="gene_name" +fi +if [ -z ${VIASH_PAR_SKIP_QC+x} ]; then + VIASH_PAR_SKIP_QC="false" +fi +if [ -z ${VIASH_PAR_SKIP_BIOTYPE_QC+x} ]; then + VIASH_PAR_SKIP_BIOTYPE_QC="false" +fi +if [ -z ${VIASH_PAR_SKIP_ALIGN+x} ]; then + VIASH_PAR_SKIP_ALIGN="false" +fi +if [ -z ${VIASH_PAR_SKIP_PSEUDO_ALIGN+x} ]; then + VIASH_PAR_SKIP_PSEUDO_ALIGN="false" +fi +if [ -z ${VIASH_PAR_SKIP_DUPRADAR+x} ]; then + VIASH_PAR_SKIP_DUPRADAR="false" +fi +if [ -z ${VIASH_PAR_SKIP_QUALIMAP+x} ]; then + VIASH_PAR_SKIP_QUALIMAP="false" +fi +if [ -z ${VIASH_PAR_SKIP_RSEQC+x} ]; then + VIASH_PAR_SKIP_RSEQC="false" +fi +if [ -z ${VIASH_PAR_SKIP_MULTIQC+x} ]; then + VIASH_PAR_SKIP_MULTIQC="false" +fi +if [ -z ${VIASH_PAR_SKIP_PRESEQ+x} ]; then + VIASH_PAR_SKIP_PRESEQ="false" +fi +if [ -z ${VIASH_PAR_EXTRA_PRESEQ_ARGS+x} ]; then + VIASH_PAR_EXTRA_PRESEQ_ARGS="-verbose -bam -seed 1" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS_GROUP_TYPE+x} ]; then + VIASH_PAR_FEATURECOUNTS_GROUP_TYPE="gene_biotype" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE+x} ]; then + VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE="exon" +fi +if [ -z ${VIASH_PAR_BIOTYPES_HEADER+x} ]; then + VIASH_PAR_BIOTYPES_HEADER="src/assets/multiqc/biotypes_header.txt" +fi +if [ -z ${VIASH_PAR_RSEQC_MODULES+x} ]; then + VIASH_PAR_RSEQC_MODULES="bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication" +fi +if [ -z ${VIASH_PAR_SAMPLE_SIZE+x} ]; then + VIASH_PAR_SAMPLE_SIZE="200000" +fi +if [ -z ${VIASH_PAR_LOWER_BOUND_SIZE+x} ]; then + VIASH_PAR_LOWER_BOUND_SIZE="-250" +fi +if [ -z ${VIASH_PAR_UPPER_BOUND_SIZE+x} ]; then + VIASH_PAR_UPPER_BOUND_SIZE="250" +fi +if [ -z ${VIASH_PAR_STEP_SIZE+x} ]; then + VIASH_PAR_STEP_SIZE="5" +fi +if [ -z ${VIASH_PAR_MAP_QUAL+x} ]; then + VIASH_PAR_MAP_QUAL="30" +fi +if [ -z ${VIASH_PAR_MIN_INTRON+x} ]; then + VIASH_PAR_MIN_INTRON="50" +fi +if [ -z ${VIASH_PAR_MIN_SPLICE_READ+x} ]; then + VIASH_PAR_MIN_SPLICE_READ="1" +fi +if [ -z ${VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND+x} ]; then + VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND="5" +fi +if [ -z ${VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND+x} ]; then + VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND="100" +fi +if [ -z ${VIASH_PAR_SAMPLING_PERCENTILE_STEP+x} ]; then + VIASH_PAR_SAMPLING_PERCENTILE_STEP="5" +fi +if [ -z ${VIASH_PAR_READ_COUNT_UPPER_LIMIT+x} ]; then + VIASH_PAR_READ_COUNT_UPPER_LIMIT="500" +fi +if [ -z ${VIASH_PAR_MINIMUM_COVERAGE+x} ]; then + VIASH_PAR_MINIMUM_COVERAGE="10" +fi +if [ -z ${VIASH_PAR_TIN_SAMPLE_SIZE+x} ]; then + VIASH_PAR_TIN_SAMPLE_SIZE="100" +fi +if [ -z ${VIASH_PAR_SUBTRACT_BACKGROUND+x} ]; then + VIASH_PAR_SUBTRACT_BACKGROUND="false" +fi +if [ -z ${VIASH_PAR_PR_BASES+x} ]; then + VIASH_PAR_PR_BASES="100" +fi +if [ -z ${VIASH_PAR_TR_BIAS+x} ]; then + VIASH_PAR_TR_BIAS="1000" +fi +if [ -z ${VIASH_PAR_ALGORITHM+x} ]; then + VIASH_PAR_ALGORITHM="uniquely-mapped-reads" +fi +if [ -z ${VIASH_PAR_SEQUENCING_PROTOCOL+x} ]; then + VIASH_PAR_SEQUENCING_PROTOCOL="non-strand-specific" +fi +if [ -z ${VIASH_PAR_SORTED+x} ]; then + VIASH_PAR_SORTED="false" +fi +if [ -z ${VIASH_PAR_JAVA_MEMORY_SIZE+x} ]; then + VIASH_PAR_JAVA_MEMORY_SIZE="4G" +fi +if [ -z ${VIASH_PAR_PRESEQ_OUTPUT+x} ]; then + VIASH_PAR_PRESEQ_OUTPUT="\$id.lc_extrap.txt" +fi +if [ -z ${VIASH_PAR_BAMSTAT_OUTPUT+x} ]; then + VIASH_PAR_BAMSTAT_OUTPUT="\$id.mapping_quality.txt" +fi +if [ -z ${VIASH_PAR_STRANDEDNESS_OUTPUT+x} ]; then + VIASH_PAR_STRANDEDNESS_OUTPUT="\$id.strandedness.txt" +fi +if [ -z ${VIASH_PAR_INNER_DIST_OUTPUT_STATS+x} ]; then + VIASH_PAR_INNER_DIST_OUTPUT_STATS="\$id.inner_distance.stats" +fi +if [ -z ${VIASH_PAR_INNER_DIST_OUTPUT_DIST+x} ]; then + VIASH_PAR_INNER_DIST_OUTPUT_DIST="\$id.inner_distance.txt" +fi +if [ -z ${VIASH_PAR_INNER_DIST_OUTPUT_FREQ+x} ]; then + VIASH_PAR_INNER_DIST_OUTPUT_FREQ="\$id.inner_distance_freq.txt" +fi +if [ -z ${VIASH_PAR_INNER_DIST_OUTPUT_PLOT+x} ]; then + VIASH_PAR_INNER_DIST_OUTPUT_PLOT="\$id.inner_distance_plot.pdf" +fi +if [ -z ${VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R+x} ]; then + VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R="\$id.inner_distance_plot.r" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG="\$id.junction_annotation.log" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R="\$id.junction_annotation_plot.r" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED="\$id.junction_annotation.bed" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT="\$id.junction_annotation.Interact.bed" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET="\$id.junction_annotation.xls" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT="\$id.splice_events.pdf" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT="\$id.splice_junctions_plot.pdf" +fi +if [ -z ${VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R+x} ]; then + VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R="\$id.junction_saturation_plot.r" +fi +if [ -z ${VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT+x} ]; then + VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT="\$id.junction_saturation_plot.pdf" +fi +if [ -z ${VIASH_PAR_READ_DISTRIBUTION_OUTPUT+x} ]; then + VIASH_PAR_READ_DISTRIBUTION_OUTPUT="\$id.read_distribution.txt" +fi +if [ -z ${VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R+x} ]; then + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R="\$id.duplication_rate_plot.r" +fi +if [ -z ${VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT+x} ]; then + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT="\$id.duplication_rate_plot.pdf" +fi +if [ -z ${VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING+x} ]; then + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING="\$id.duplication_rate_mapping.xls" +fi +if [ -z ${VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE+x} ]; then + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE="\$id.duplication_rate_sequencing.xls" +fi +if [ -z ${VIASH_PAR_TIN_OUTPUT_SUMMARY+x} ]; then + VIASH_PAR_TIN_OUTPUT_SUMMARY="\$id.tin_summary.txt" +fi +if [ -z ${VIASH_PAR_TIN_OUTPUT_METRICS+x} ]; then + VIASH_PAR_TIN_OUTPUT_METRICS="\$id.tin.xls" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX="\$id.dup_matrix.txt" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC="\$id.dup_intercept_mqc.txt" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT="\$id.duprate_exp_boxplot.pdf" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT="\$id.duprate_exp_densityplot.pdf" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC="\$id.duprate_exp_density_curve_mqc.pdf" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM="\$id.expression_hist.pdf" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE="\$id.intercept_slope.txt" +fi +if [ -z ${VIASH_PAR_DESEQ2_OUTPUT+x} ]; then + VIASH_PAR_DESEQ2_OUTPUT="deseq2" +fi +if [ -z ${VIASH_PAR_DESEQ2_OUTPUT_PSEUDO+x} ]; then + VIASH_PAR_DESEQ2_OUTPUT_PSEUDO="deseq2_pseudo" +fi +if [ -z ${VIASH_PAR_MULTIQC_REPORT+x} ]; then + VIASH_PAR_MULTIQC_REPORT="multiqc_report.html" +fi +if [ -z ${VIASH_PAR_MULTIQC_DATA+x} ]; then + VIASH_PAR_MULTIQC_DATA="multiqc_data" +fi +if [ -z ${VIASH_PAR_MULTIQC_PLOTS+x} ]; then + VIASH_PAR_MULTIQC_PLOTS="multiqc_plots" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS+x} ]; then + VIASH_PAR_FEATURECOUNTS="\$id.featureCounts.txt" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS_SUMMARY+x} ]; then + VIASH_PAR_FEATURECOUNTS_SUMMARY="\$id.featureCounts.txt.summary" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS_MULTIQC+x} ]; then + VIASH_PAR_FEATURECOUNTS_MULTIQC="\$id.featureCounts_mqc.tsv" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC+x} ]; then + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC="\$id.featureCounts_rrna_mqc.tsv" +fi +if [ -z ${VIASH_PAR_TPM_GENE+x} ]; then + VIASH_PAR_TPM_GENE="salmon.merged.gene_tpm.tsv" +fi +if [ -z ${VIASH_PAR_COUNTS_GENE+x} ]; then + VIASH_PAR_COUNTS_GENE="salmon.merged.gene_counts.tsv" +fi +if [ -z ${VIASH_PAR_COUNTS_GENE_LENGTH_SCALED+x} ]; then + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED="salmon.merged.gene_counts_length_scaled.tsv" +fi +if [ -z ${VIASH_PAR_COUNTS_GENE_SCALED+x} ]; then + VIASH_PAR_COUNTS_GENE_SCALED="salmon.merged.gene_counts_scaled.tsv" +fi +if [ -z ${VIASH_PAR_TPM_TRANSCRIPT+x} ]; then + VIASH_PAR_TPM_TRANSCRIPT="salmon.merged.transcript_tpm.tsv" +fi +if [ -z ${VIASH_PAR_COUNTS_TRANSCRIPT+x} ]; then + VIASH_PAR_COUNTS_TRANSCRIPT="salmon.merged.transcript_counts.tsv" +fi +if [ -z ${VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT+x} ]; then + VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT="salmon_merged_summarizedexperiment" +fi +if [ -z ${VIASH_PAR_PSEUDO_TPM_GENE+x} ]; then + VIASH_PAR_PSEUDO_TPM_GENE="pseudo_gene_tpm.tsv" +fi +if [ -z ${VIASH_PAR_PSEUDO_COUNTS_GENE+x} ]; then + VIASH_PAR_PSEUDO_COUNTS_GENE="pseudo_gene_counts.tsv" +fi +if [ -z ${VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED+x} ]; then + VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED="pseudo_gene_counts_length_scaled.tsv" +fi +if [ -z ${VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED+x} ]; then + VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED="pseudo_gene_counts_scaled.tsv" +fi +if [ -z ${VIASH_PAR_PSEUDO_TPM_TRANSCRIPT+x} ]; then + VIASH_PAR_PSEUDO_TPM_TRANSCRIPT="pseudo_transcript_tpm.tsv" +fi +if [ -z ${VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT+x} ]; then + VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT="pseudo_transcript_counts.tsv" +fi +if [ -z ${VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT+x} ]; then + VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT="pseudo_quant_merged_summarizedexperiment" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_GENOME_BAM" ] && [ ! -e "$VIASH_PAR_GENOME_BAM" ]; then + ViashError "Input file '$VIASH_PAR_GENOME_BAM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_INDEX" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_GENOME_BAM_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENE_BED" ] && [ ! -e "$VIASH_PAR_GENE_BED" ]; then + ViashError "Input file '$VIASH_PAR_GENE_BED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then + ViashError "Input file '$VIASH_PAR_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUANT_OUT_DIR" ] && [ ! -e "$VIASH_PAR_QUANT_OUT_DIR" ]; then + ViashError "Input file '$VIASH_PAR_QUANT_OUT_DIR' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUANT_RESULTS_FILE" ] && [ ! -e "$VIASH_PAR_QUANT_RESULTS_FILE" ]; then + ViashError "Input file '$VIASH_PAR_QUANT_RESULTS_FILE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_QUANT_OUT_DIR" ] && [ ! -e "$VIASH_PAR_PSEUDO_QUANT_OUT_DIR" ]; then + ViashError "Input file '$VIASH_PAR_PSEUDO_QUANT_OUT_DIR' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_SALMON_QUANT_RESULTS_FILE" ] && [ ! -e "$VIASH_PAR_PSEUDO_SALMON_QUANT_RESULTS_FILE" ]; then + ViashError "Input file '$VIASH_PAR_PSEUDO_SALMON_QUANT_RESULTS_FILE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_KALLISTO_QUANT_RESULTS_FILE" ] && [ ! -e "$VIASH_PAR_PSEUDO_KALLISTO_QUANT_RESULTS_FILE" ]; then + ViashError "Input file '$VIASH_PAR_PSEUDO_KALLISTO_QUANT_RESULTS_FILE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RSEM_COUNTS_GENE" ] && [ ! -e "$VIASH_PAR_RSEM_COUNTS_GENE" ]; then + ViashError "Input file '$VIASH_PAR_RSEM_COUNTS_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS" ] && [ ! -e "$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS" ]; then + ViashError "Input file '$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BIOTYPES_HEADER" ] && [ ! -e "$VIASH_PAR_BIOTYPES_HEADER" ]; then + ViashError "Input file '$VIASH_PAR_BIOTYPES_HEADER' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MULTIQC_CUSTOM_CONFIG" ] && [ ! -e "$VIASH_PAR_MULTIQC_CUSTOM_CONFIG" ]; then + ViashError "Input file '$VIASH_PAR_MULTIQC_CUSTOM_CONFIG' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION" ] && [ ! -e "$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION" ]; then + ViashError "Input file '$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RSEM_MULTIQC" ] && [ ! -e "$VIASH_PAR_RSEM_MULTIQC" ]; then + ViashError "Input file '$VIASH_PAR_RSEM_MULTIQC' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_PAIRED" ]]; then + if ! [[ "$VIASH_PAR_PAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--paired' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_QC" ]]; then + if ! [[ "$VIASH_PAR_SKIP_QC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_qc' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_BIOTYPE_QC" ]]; then + if ! [[ "$VIASH_PAR_SKIP_BIOTYPE_QC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_biotype_qc' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_ALIGN" ]]; then + if ! [[ "$VIASH_PAR_SKIP_ALIGN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_align' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_PSEUDO_ALIGN" ]]; then + if ! [[ "$VIASH_PAR_SKIP_PSEUDO_ALIGN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_pseudo_align' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_DUPRADAR" ]]; then + if ! [[ "$VIASH_PAR_SKIP_DUPRADAR" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_dupradar' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_QUALIMAP" ]]; then + if ! [[ "$VIASH_PAR_SKIP_QUALIMAP" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_qualimap' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_RSEQC" ]]; then + if ! [[ "$VIASH_PAR_SKIP_RSEQC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_rseqc' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_MULTIQC" ]]; then + if ! [[ "$VIASH_PAR_SKIP_MULTIQC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_multiqc' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_PRESEQ" ]]; then + if ! [[ "$VIASH_PAR_SKIP_PRESEQ" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_preseq' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_GENCODE" ]]; then + if ! [[ "$VIASH_PAR_GENCODE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--gencode' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SAMPLE_SIZE" ]]; then + if ! [[ "$VIASH_PAR_SAMPLE_SIZE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sample_size' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLE_SIZE -lt 1 ]]; then + ViashError '--sample_size' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LOWER_BOUND_SIZE" ]]; then + if ! [[ "$VIASH_PAR_LOWER_BOUND_SIZE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--lower_bound_size' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_UPPER_BOUND_SIZE" ]]; then + if ! [[ "$VIASH_PAR_UPPER_BOUND_SIZE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--upper_bound_size' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_STEP_SIZE" ]]; then + if ! [[ "$VIASH_PAR_STEP_SIZE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--step_size' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAP_QUAL" ]]; then + if ! [[ "$VIASH_PAR_MAP_QUAL" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--map_qual' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_MAP_QUAL -lt 0 ]]; then + ViashError '--map_qual' has be more than or equal to 0. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_INTRON" ]]; then + if ! [[ "$VIASH_PAR_MIN_INTRON" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_intron' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_MIN_INTRON -lt 1 ]]; then + ViashError '--min_intron' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_SPLICE_READ" ]]; then + if ! [[ "$VIASH_PAR_MIN_SPLICE_READ" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_splice_read' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_MIN_SPLICE_READ -lt 1 ]]; then + ViashError '--min_splice_read' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND" ]]; then + if ! [[ "$VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sampling_percentile_lower_bound' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND -lt 0 ]]; then + ViashError '--sampling_percentile_lower_bound' has be more than or equal to 0. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND -gt 100 ]]; then + ViashError '--sampling_percentile_lower_bound' has be less than or equal to 100. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND" ]]; then + if ! [[ "$VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sampling_percentile_upper_bound' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND -lt 0 ]]; then + ViashError '--sampling_percentile_upper_bound' has be more than or equal to 0. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND -gt 100 ]]; then + ViashError '--sampling_percentile_upper_bound' has be less than or equal to 100. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SAMPLING_PERCENTILE_STEP" ]]; then + if ! [[ "$VIASH_PAR_SAMPLING_PERCENTILE_STEP" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sampling_percentile_step' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLING_PERCENTILE_STEP -lt 0 ]]; then + ViashError '--sampling_percentile_step' has be more than or equal to 0. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_SAMPLING_PERCENTILE_STEP -gt 100 ]]; then + ViashError '--sampling_percentile_step' has be less than or equal to 100. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_READ_COUNT_UPPER_LIMIT" ]]; then + if ! [[ "$VIASH_PAR_READ_COUNT_UPPER_LIMIT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--read_count_upper_limit' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_READ_COUNT_UPPER_LIMIT -lt 1 ]]; then + ViashError '--read_count_upper_limit' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MINIMUM_COVERAGE" ]]; then + if ! [[ "$VIASH_PAR_MINIMUM_COVERAGE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--minimum_coverage' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_MINIMUM_COVERAGE -lt 1 ]]; then + ViashError '--minimum_coverage' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_TIN_SAMPLE_SIZE" ]]; then + if ! [[ "$VIASH_PAR_TIN_SAMPLE_SIZE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--tin_sample_size' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_TIN_SAMPLE_SIZE -lt 1 ]]; then + ViashError '--tin_sample_size' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SUBTRACT_BACKGROUND" ]]; then + if ! [[ "$VIASH_PAR_SUBTRACT_BACKGROUND" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--subtract_background' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_PR_BASES" ]]; then + if ! [[ "$VIASH_PAR_PR_BASES" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--pr_bases' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_PR_BASES -lt 1 ]]; then + ViashError '--pr_bases' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_TR_BIAS" ]]; then + if ! [[ "$VIASH_PAR_TR_BIAS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--tr_bias' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_TR_BIAS -lt 1 ]]; then + ViashError '--tr_bias' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SORTED" ]]; then + if ! [[ "$VIASH_PAR_SORTED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--sorted' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_DESEQ2_QC" ]]; then + if ! [[ "$VIASH_PAR_SKIP_DESEQ2_QC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_deseq2_qc' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DESEQ2_VST" ]]; then + if ! [[ "$VIASH_PAR_DESEQ2_VST" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--deseq2_vst' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_PASSED_TRIMMED_READS" ]]; then + if ! [[ "$VIASH_PAR_PASSED_TRIMMED_READS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--passed_trimmed_reads' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_NUM_TRIMMED_READS" ]]; then + if ! [[ "$VIASH_PAR_NUM_TRIMMED_READS" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--num_trimmed_reads' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_PASSED_MAPPING" ]]; then + if ! [[ "$VIASH_PAR_PASSED_MAPPING" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--passed_mapping' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_PERCENT_MAPPED" ]]; then + if ! [[ "$VIASH_PAR_PERCENT_MAPPED" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--percent_mapped' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_RSEQC_MODULES" ]; then + VIASH_PAR_RSEQC_MODULES_CHOICES=("bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication;tin") + IFS=';' + set -f + for val in $VIASH_PAR_RSEQC_MODULES; do + if ! [[ ";${VIASH_PAR_RSEQC_MODULES_CHOICES[*]};" =~ ";${val};" ]]; then + ViashError '--rseqc_modules' specified value of \'${val}\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_SEQUENCING_PROTOCOL" ]; then + VIASH_PAR_SEQUENCING_PROTOCOL_CHOICES=("non-strand-specific;strand-specific-reverse;strand-specific-forward") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_SEQUENCING_PROTOCOL_CHOICES[*]};" =~ ";$VIASH_PAR_SEQUENCING_PROTOCOL;" ]]; then + ViashError '--sequencing_protocol' specified value of \'$VIASH_PAR_SEQUENCING_PROTOCOL\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_PRESEQ_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_PRESEQ_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PRESEQ_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_BAMSTAT_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_BAMSTAT_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BAMSTAT_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_STRANDEDNESS_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_STRANDEDNESS_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STRANDEDNESS_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_INNER_DIST_OUTPUT_STATS" ] && [ ! -d "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_STATS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_STATS")" +fi +if [ ! -z "$VIASH_PAR_INNER_DIST_OUTPUT_DIST" ] && [ ! -d "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_DIST")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_DIST")" +fi +if [ ! -z "$VIASH_PAR_INNER_DIST_OUTPUT_FREQ" ] && [ ! -d "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_FREQ")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_FREQ")" +fi +if [ ! -z "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT")" +fi +if [ ! -z "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R" ] && [ ! -d "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT")" +fi +if [ ! -z "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R" ] && [ ! -d "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R")" +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT")" +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING" ] && [ ! -d "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING")" +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE" ] && [ ! -d "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE")" +fi +if [ ! -z "$VIASH_PAR_TIN_OUTPUT_SUMMARY" ] && [ ! -d "$(dirname "$VIASH_PAR_TIN_OUTPUT_SUMMARY")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TIN_OUTPUT_SUMMARY")" +fi +if [ ! -z "$VIASH_PAR_TIN_OUTPUT_METRICS" ] && [ ! -d "$(dirname "$VIASH_PAR_TIN_OUTPUT_METRICS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TIN_OUTPUT_METRICS")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE")" +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_QC_REPORT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_QC_REPORT")" +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_COUNTS" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_COUNTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_COUNTS")" +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_REPORT" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_REPORT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_REPORT")" +fi +if [ ! -z "$VIASH_PAR_DESEQ2_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_DESEQ2_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DESEQ2_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO" ] && [ ! -d "$(dirname "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO")" +fi +if [ ! -z "$VIASH_PAR_MULTIQC_REPORT" ] && [ ! -d "$(dirname "$VIASH_PAR_MULTIQC_REPORT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MULTIQC_REPORT")" +fi +if [ ! -z "$VIASH_PAR_MULTIQC_DATA" ] && [ ! -d "$(dirname "$VIASH_PAR_MULTIQC_DATA")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MULTIQC_DATA")" +fi +if [ ! -z "$VIASH_PAR_MULTIQC_PLOTS" ] && [ ! -d "$(dirname "$VIASH_PAR_MULTIQC_PLOTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MULTIQC_PLOTS")" +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS" ] && [ ! -d "$(dirname "$VIASH_PAR_FEATURECOUNTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FEATURECOUNTS")" +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_SUMMARY" ] && [ ! -d "$(dirname "$VIASH_PAR_FEATURECOUNTS_SUMMARY")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FEATURECOUNTS_SUMMARY")" +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_FEATURECOUNTS_MULTIQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FEATURECOUNTS_MULTIQC")" +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC")" +fi +if [ ! -z "$VIASH_PAR_TPM_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_TPM_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TPM_GENE")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE_SCALED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE_SCALED")" +fi +if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_TPM_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TPM_TRANSCRIPT")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPT")" +fi +if [ ! -z "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && [ ! -d "$(dirname "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_TPM_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_TPM_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_TPM_GENE")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_GENE")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + + +# set dependency paths +VIASH_DEP_RSEQC_RSEQC_JUNCTIONANNOTATION="$VIASH_META_RESOURCES_DIR/../../../nextflow/rseqc/rseqc_junctionannotation/main.nf" +VIASH_DEP_RSEQC_RSEQC_JUNCTIONSATURATION="$VIASH_META_RESOURCES_DIR/../../../nextflow/rseqc/rseqc_junctionsaturation/main.nf" +VIASH_DEP_RSEQC_RSEQC_READDISTRIBUTION="$VIASH_META_RESOURCES_DIR/../../../nextflow/rseqc/rseqc_readdistribution/main.nf" +VIASH_DEP_RSEQC_RSEQC_READDUPLICATION="$VIASH_META_RESOURCES_DIR/../../../nextflow/rseqc/rseqc_readduplication/main.nf" +VIASH_DEP_RSEQC_RSEQC_TIN="$VIASH_META_RESOURCES_DIR/../../../nextflow/rseqc/rseqc_tin/main.nf" +VIASH_DEP_DUPRADAR="$VIASH_META_RESOURCES_DIR/../../../nextflow/dupradar/main.nf" +VIASH_DEP_PRESEQ_LCEXTRAP="$VIASH_META_RESOURCES_DIR/../../../nextflow/preseq_lcextrap/main.nf" +VIASH_DEP_MULTIQC_CUSTOM_BIOTYPE="$VIASH_META_RESOURCES_DIR/../../../nextflow/multiqc_custom_biotype/main.nf" +VIASH_DEP_DESEQ2_QC="$VIASH_META_RESOURCES_DIR/../../../nextflow/deseq2_qc/main.nf" +VIASH_DEP_PREPARE_MULTIQC_INPUT="$VIASH_META_RESOURCES_DIR/../../../nextflow/prepare_multiqc_input/main.nf" +VIASH_DEP_RSEM_MERGE_COUNTS="$VIASH_META_RESOURCES_DIR/../../../nextflow/rsem_merge_counts/main.nf" +VIASH_DEP_WORKFLOWS_MERGE_QUANT_RESULTS="$VIASH_META_RESOURCES_DIR/../../../nextflow/workflows/merge_quant_results/main.nf" +VIASH_DEP_RSEQC_RSEQC_BAMSTAT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/main.nf" +VIASH_DEP_RSEQC_RSEQC_INFEREXPERIMENT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/main.nf" +VIASH_DEP_RSEQC_RSEQC_INNER_DISTANCE="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/main.nf" +VIASH_DEP_QUALIMAP_QUALIMAP_RNASEQ="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/main.nf" +VIASH_DEP_FEATURECOUNTS="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/main.nf" +VIASH_DEP_MULTIQC="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/main.nf" + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-quality_control-XXXXXX").nf +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +//// VIASH START +// The following code has been auto-generated by Viash. + +//// VIASH END +workflow run_wf { + + take: + input_ch + + main: + + qc_ch = input_ch + + // temporary fix to force assignment when alignment is skipped + | map {it} + + // Feature biotype QC using featureCounts + | map { id, state -> + def biotype_in_gtf = biotypeInGtf(state.gtf, state.biotype) + def attribute_type = state.gencode ? "gene_type" : state.featurecounts_group_type + def strand = (state.strandedness == "forward") ? 1 : ((state.strandedness == "reverse") ? 2 : 0) + [ id, state + [biotype_in_gtf: biotype_in_gtf, attribute_type: attribute_type, strand: strand] ] + } + + | featurecounts.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.biotype_in_gtf && !state.skip_align }, + fromState: [ + "paired": "paired", + "strand": "strand", + "annotation": "gtf", + "input": "genome_bam", + "attribute_type": "attribute_type", + "feature_type": "featurecounts_feature_type", + "count_read_pairs": "paired" + ], + toState: [ + "featurecounts": "counts", + "featurecounts_summary": "summary" + ], + args: [ + both_aligned: true, + same_strand: true + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | multiqc_custom_biotype.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.featurecounts && !state.skip_align }, + fromState: [ + "id": "id", + "biocounts": "featurecounts", + "biotypes_header": "biotypes_header" + ], + toState: [ + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | preseq_lcextrap.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_preseq && !state.skip_align }, + fromState: [ + "paired": "paired", + "input": "genome_bam", + "extra_preseq_args": "extra_preseq_args" + ], + toState: [ "preseq_output": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | rseqc_bamstat.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "bam_stat" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input_file": "genome_bam", + "mapq": "map_qual" + ], + toState: [ "bamstat_output": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_inferexperiment.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "infer_experiment" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input_file": "genome_bam", + "refgene": "gene_bed", + "sample_size": "sample_size", + "mapq": "map_qual" + ], + toState: [ "strandedness_output": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + // Get predicted strandedness from the RSeQC infer_experiment.py output + | map { id, state -> + def inferred_strand = getInferexperimentStrandedness(state.strandedness_output, 30) + def passed_strand_check = (state.strandedness != inferred_strand[0]) ? false : true + [ id, state + [ inferred_strand: inferred_strand, passed_strand_check: passed_strand_check ] ] + } + | rseqc_inner_distance.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && state.paired && "inner_distance" in state.rseqc_modules && !state.skip_align }, + key: "inner_distance", + args: ["output_prefix": "output"], + fromState: [ + "input_file": "genome_bam", + "refgene": "gene_bed", + "sample_size": "sample_size", + "mapq": "map_qual", + "lower_bound": "lower_bound_size", + "upper_bound": "upper_bound_size", + "step": "step_size", + ], + toState: [ + "inner_dist_output_stats": "output_stats", + "inner_dist_output_dist": "output_dist", + "inner_dist_output_freq": "output_freq", + "inner_dist_output_plot": "output_plot", + "inner_dist_output_plot_r": "output_plot_r" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_junctionannotation.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_annotation" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + "map_qual": "map_qual", + "min_intron": "min_intron" + ], + toState: [ + "junction_annotation_output_log": "output_log", + "junction_annotation_output_plot_r": "output_plot_r", + "junction_annotation_output_junction_bed": "output_junction_bed", + "junction_annotation_output_junction_interact": "output_junction_interact", + "junction_annotation_output_junction_sheet": "output_junction_sheet", + "junction_annotation_output_splice_events_plot": "output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "output_splice_junctions_plot" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_junctionsaturation.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_saturation" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + "sampling_percentile_lower_bound": "sampling_percentile_lower_bound", + "sampling_percentile_upper_bound": "sampling_percentile_upper_bound", + "sampling_percentile_step": "sampling_percentile_step", + "min_intron": "min_intron", + "min_splice_read": "min_splice_read", + "map_qual": "map_qual" + ], + toState: [ + "junction_saturation_output_plot_r": "output_plot_r", + "junction_saturation_output_plot": "output_plot" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_readdistribution.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_distribution" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + ], + toState: [ "read_distribution_output": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_readduplication.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_duplication" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "read_count_upper_limit": "read_count_upper_limit", + "map_qual": "map_qual" + ], + toState: [ + "read_duplication_output_duplication_rate_plot_r": "output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "output_duplication_rate_sequence" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_tin.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "tin" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "bam_input": "genome_bam", + "bai_input": "genome_bam_index", + "refgene": "gene_bed", + "minimum_coverage": "minimum_coverage", + "sample_size": "tin_sample_size", + "subtract_background": "subtract_background" + ], + toState: [ + "tin_output_summary": "output_tin_summary", + "tin_output_metrics": "output_tin" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | dupradar.run( + runIf: { id, state -> !state.skip_qc && !state.skip_dupradar && !state.skip_align }, + fromState: [ + "id": "id", + "input": "genome_bam", + "gtf_annotation": "gtf", + "paired": "paired", + "strandedness": "strandedness" + ], + toState: [ + "dupradar_output_dupmatrix": "output_dupmatrix", + "dupradar_output_dup_intercept_mqc": "output_dup_intercept_mqc", + "dupradar_output_duprate_exp_boxplot": "output_duprate_exp_boxplot", + "dupradar_output_duprate_exp_densplot": "output_duprate_exp_densplot", + "dupradar_output_duprate_exp_denscurve_mqc": "output_duprate_exp_denscurve_mqc", + "dupradar_output_expression_histogram": "output_expression_histogram", + "dupradar_output_intercept_slope": "output_intercept_slope" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // TODO: Add outdir as an output argument to the qualimap module on biobox. + // Qualimap ouputs a few more raw data files to outdir but since the module is using a temporary directory as output dir these files are lost. + | qualimap_rnaseq.run( + runIf: { id, state -> !state.skip_qc && !state.skip_qualimap && !state.skip_align }, + fromState: [ + "bam": "genome_bam", + "gtf": "gtf", + "num_pr_bases": "pr_bases", + "num_tr_bias": "tr_bias", + "algorithm": "algorithm", + "sequencing_protocol": "sequencing_protocol", + "sorted": "sorted", + "java_memory_size": "java_memory_size", + ], + toState: [ + "qualimap_report": "report", + "qualimap_qc_report": "qc_report", + "qualimap_counts": "counts" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + merged_ch = qc_ch + | toSortedList + | map { list -> + def ids = list.collect { id, state -> id } + def strandedness = list.collect { id, state -> state.strandedness } + def num_trimmed_reads = list.collect { id, state -> state.num_trimmed_reads } + def passed_trimmed_reads = list.collect { id, state -> state.passed_trimmed_reads } + def passed_mapping = list.collect { id, state -> state.passed_mapping } + def percent_mapped = list.collect { id, state -> state.percent_mapped } + def inferred_strand = list.collect { id, state -> state.inferred_strand } + def passed_strand_check = list.collect { id, state -> state.passed_strand_check } + def gtf = list.collect { id, state -> state.gtf }.unique()[0] + def gtf_extra_attributes = list.collect { id, state -> state.gtf_extra_attributes }.unique()[0] + def gtf_group_features = list.collect { id, state -> state.gtf_group_features }.unique()[0] + def pca_header_multiqc = list.collect { id, state -> state.pca_header_multiqc }.unique()[0] + def clustering_header_multiqc = list.collect { id, state -> state.clustering_header_multiqc }.unique()[0] + def aligner = list.collect { id, state -> state.aligner }.unique()[0] + def pseudo_aligner = list.collect { id, state -> state.pseudo_aligner }.unique()[0] + def deseq2_vst = list.collect { id, state -> state.deseq2_vst }.unique()[0] + def extra_deseq2_args = list.collect { id, state -> state.extra_deseq2_args }.unique()[0] + def extra_deseq2_args2 = list.collect { id, state -> state.extra_deseq2_args2 }.unique()[0] + def skip_deseq2_qc = list.collect { id, state -> state.skip_deseq2_qc }.unique()[0] + def skip_qc = list.collect { id, state -> state.skip_qc }.unique()[0] + def skip_align = list.collect { id, state -> state.skip_align }.unique()[0] + def skip_pseudo_align = list.collect { id, state -> state.skip_pseudo_align }.unique()[0] + def quant_results = list.collect { id, state -> + (state.quant_results_file instanceof java.nio.file.Path && state.quant_results_file.exists()) ? + state.quant_results_file : + null } + def rsem_counts_gene = list.collect { id, state -> + (state.rsem_counts_gene instanceof java.nio.file.Path && state.rsem_counts_gene.exists()) ? + state.rsem_counts_gene : + null } + def rsem_counts_transcripts = list.collect { id, state -> + (state.rsem_counts_transcripts instanceof java.nio.file.Path && state.rsem_counts_transcripts.exists()) ? + state.rsem_counts_transcripts : + null } + def pseudo_quant_out_dir = list.collect { id, state -> + (state.pseudo_quant_out_dir instanceof java.nio.file.Path && state.pseudo_quant_out_dir.exists()) ? + state.pseudo_quant_out_dir : + null } + def pseudo_salmon_quant_results = list.collect { id, state -> + (state.pseudo_salmon_quant_results_file instanceof java.nio.file.Path && state.pseudo_salmon_quant_results_file.exists()) ? + state.pseudo_salmon_quant_results_file : + null } + def pseudo_kallisto_quant_results = list.collect { id, state -> + (state.pseudo_kallisto_quant_results_file instanceof java.nio.file.Path && state.pseudo_kallisto_quant_results_file.exists()) ? + state.pseudo_kallisto_quant_results_file : + null } + def fastqc_zip_1 = list.collect { id, state -> + (state.fastqc_zip_1 instanceof java.nio.file.Path && state.fastqc_zip_1.exists()) ? + state.fastqc_zip_1 : + null } + def fastqc_zip_2 = list.collect { id, state -> + (state.fastqc_zip_2 instanceof java.nio.file.Path && state.fastqc_zip_2.exists()) ? + state.fastqc_zip_2 : + null } + def trim_zip_1 = list.collect { id, state -> + (state.trim_zip_1 instanceof java.nio.file.Path && state.trim_zip_1.exists()) ? + state.trim_zip_1 : + null } + def trim_zip_2 = list.collect { id, state -> + (state.trim_zip_2 instanceof java.nio.file.Path && state.trim_zip_2.exists()) ? + state.trim_zip_2 : + null } + def trim_log_1 = list.collect { id, state -> + (state.trim_log_1 instanceof java.nio.file.Path && state.trim_log_1.exists()) ? + state.trim_log_1 : + null } + def trim_log_2 = list.collect { id, state -> + (state.trim_log_2 instanceof java.nio.file.Path && state.trim_log_2.exists()) ? + state.trim_log_2 : + null } + def sortmerna_multiqc = list.collect { id, state -> + (state.sortmerna_multiqc instanceof java.nio.file.Path && state.sortmerna_multiqc.exists()) ? + state.sortmerna_multiqc : + null } + def star_multiqc = list.collect { id, state -> + (state.star_multiqc instanceof java.nio.file.Path && state.star_multiqc.exists()) ? + state.star_multiqc : + null } + def genome_bam_stats = list.collect { id, state -> + (state.genome_bam_stats instanceof java.nio.file.Path && state.genome_bam_stats.exists()) ? + state.genome_bam_stats : + null } + def genome_bam_flagstat = list.collect { id, state -> + (state.genome_bam_flagstat instanceof java.nio.file.Path && state.genome_bam_flagstat.exists()) ? + state.genome_bam_flagstat : + null } + def genome_bam_idxstats = list.collect { id, state -> + (state.genome_bam_idxstats instanceof java.nio.file.Path && state.genome_bam_idxstats.exists()) ? + state.genome_bam_idxstats : + null } + def markduplicates_multiqc = list.collect { id, state -> + (state.markduplicates_multiqc instanceof java.nio.file.Path && state.markduplicates_multiqc.exists()) ? + state.markduplicates_multiqc : + null } + def salmon_multiqc = list.collect { id, state -> + (state.salmon_multiqc instanceof java.nio.file.Path && state.salmon_multiqc.exists()) ? + state.salmon_multiqc : + null } + def rsem_multiqc = list.collect { id, state -> + (state.rsem_multiqc instanceof java.nio.file.Path && state.rsem_multiqc.exists()) ? + state.rsem_multiqc : + null } + def pseudo_multiqc = list.collect { id, state -> + (state.pseudo_multiqc instanceof java.nio.file.Path && state.pseudo_multiqc.exists()) ? + state.pseudo_multiqc : + null } + def featurecounts_multiqc = list.collect { id, state -> + (state.featurecounts_multiqc instanceof java.nio.file.Path && state.featurecounts_multiqc.exists()) ? + state.featurecounts_multiqc : + null } + def featurecounts_rrna_multiqc = list.collect { id, state -> + (state.featurecounts_rrna_multiqc instanceof java.nio.file.Path && state.featurecounts_rrna_multiqc.exists()) ? + state.featurecounts_rrna_multiqc : + null } + def preseq_output = list.collect { id, state -> + (state.preseq_output instanceof java.nio.file.Path && state.preseq_output.exists()) ? + state.preseq_output : + null } + // def qualimap_output_dir = list.collect { id, state -> + // (state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ? + // state.qualimap_output_dir : + // null } + def dupradar_output_dup_intercept_mqc = list.collect { id, state -> + (state.dupradar_output_dup_intercept_mqc instanceof java.nio.file.Path && state.dupradar_output_dup_intercept_mqc.exists()) ? + state.dupradar_output_dup_intercept_mqc : + null } + def dupradar_output_duprate_exp_denscurve_mqc = list.collect { id, state -> + (state.dupradar_output_duprate_exp_denscurve_mqc instanceof java.nio.file.Path && state.dupradar_output_duprate_exp_denscurve_mqc.exists()) ? + state.dupradar_output_duprate_exp_denscurve_mqc : + null } + def bamstat_output = list.collect { id, state -> + (state.bamstat_output instanceof java.nio.file.Path && state.bamstat_output.exists()) ? + state.bamstat_output : + null } + def inferexperiment_multiqc = list.collect { id, state -> + (state.strandedness_output instanceof java.nio.file.Path && state.strandedness_output.exists()) ? + state.strandedness_output : + null } + def inner_dist_output_freq = list.collect { id, state -> + (state.inner_dist_output_freq instanceof java.nio.file.Path && state.inner_dist_output_freq.exists()) ? + state.inner_dist_output_freq : + null } + def junction_annotation_output_log = list.collect { id, state -> + (state.junction_annotation_output_log instanceof java.nio.file.Path && state.junction_annotation_output_log.exists()) ? + state.junction_annotation_output_log : + null } + def junction_saturation_output_plot_r = list.collect { id, state -> + (state.junction_saturation_output_plot_r instanceof java.nio.file.Path && state.junction_saturation_output_plot_r.exists()) ? + state.junction_saturation_output_plot_r : + null } + def read_distribution_output = list.collect { id, state -> + (state.read_distribution_output instanceof java.nio.file.Path && state.read_distribution_output.exists()) ? + state.read_distribution_output : + null } + def read_duplication_output_duplication_rate_mapping = list.collect { id, state -> + (state.read_duplication_output_duplication_rate_mapping instanceof java.nio.file.Path && state.read_duplication_output_duplication_rate_mapping.exists()) ? + state.read_duplication_output_duplication_rate_mapping : + null } + def tin_output_summary = list.collect { id, state -> + (state.tin_output_summary instanceof java.nio.file.Path && state.tin_output_summary.exists()) ? + state.tin_output_summary : + null } + def multiqc_custom_config = list.collect { id, state -> state.multiqc_custom_config }.unique()[0] + + ["merged", + [ + ids: ids, + strandedness: strandedness, + num_trimmed_reads: num_trimmed_reads, + passed_trimmed_reads: passed_trimmed_reads, + passed_mapping: passed_mapping, + percent_mapped: percent_mapped, + inferred_strand: inferred_strand, + passed_strand_check: passed_strand_check, + skip_align: skip_align, + skip_pseudo_align: skip_pseudo_align, + quant_results: quant_results.findAll { it != null }, + rsem_counts_gene: rsem_counts_gene.findAll { it != null }, + rsem_counts_transcripts: rsem_counts_transcripts.findAll { it != null }, + pseudo_quant_out_dir: pseudo_quant_out_dir.findAll { it != null }, + pseudo_salmon_quant_results: pseudo_salmon_quant_results.findAll { it != null }, + pseudo_kallisto_quant_results: pseudo_kallisto_quant_results.findAll { it != null }, + gtf: gtf, + gtf_extra_attributes: gtf_extra_attributes, + gtf_group_features: gtf_group_features, + pca_header_multiqc: pca_header_multiqc, + clustering_header_multiqc: clustering_header_multiqc, + aligner: aligner, + pseudo_aligner: pseudo_aligner, + deseq2_vst: deseq2_vst, + extra_deseq2_args: extra_deseq2_args, + extra_deseq2_args2: extra_deseq2_args2, + skip_deseq2_qc: skip_deseq2_qc, + fastqc_zip: fastqc_zip_1 + fastqc_zip_2, + trim_zip: trim_zip_1 + trim_zip_2, + trim_log: trim_log_1 + trim_log_2, + sortmerna_multiqc: sortmerna_multiqc, + star_multiqc: star_multiqc, + genome_bam_stats: genome_bam_stats, + genome_bam_flagstat: genome_bam_flagstat, + genome_bam_idxstats: genome_bam_idxstats, + markduplicates_multiqc: markduplicates_multiqc, + salmon_multiqc: salmon_multiqc, + rsem_multiqc: rsem_multiqc, + pseudo_multiqc: pseudo_multiqc, + featurecounts_multiqc: featurecounts_multiqc, + featurecounts_rrna_multiqc: featurecounts_rrna_multiqc, + preseq_output: preseq_output, + // qualimap_output_dir: qualimap_output_dir, + dupradar_output_dup_intercept_mqc: dupradar_output_dup_intercept_mqc, + dupradar_output_duprate_exp_denscurve_mqc: dupradar_output_duprate_exp_denscurve_mqc, + bamstat_output: bamstat_output, + inner_dist_output_freq: inner_dist_output_freq, + inferexperiment_multiqc: inferexperiment_multiqc, + junction_annotation_output_log: junction_annotation_output_log, + junction_saturation_output_plot_r: junction_saturation_output_plot_r, + read_distribution_output: read_distribution_output, + read_duplication_output_duplication_rate_mapping: read_duplication_output_duplication_rate_mapping, + tin_output_summary: tin_output_summary, + multiqc_custom_config: multiqc_custom_config + ] + ] + } + + // Merge quantification results of alignment + | merge_quant_results.run ( + runIf: { id, state -> !state.skip_align && state.aligner == 'star_salmon' }, + fromState: [ + "salmon_quant_results": "quant_results", + "gtf": "gtf", + "gtf_extra_attributes": "gtf_extra_attributes", + "gtf_group_features": "gtf_group_features" + ], + args: [ quant_type: "salmon"], + toState: [ + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "lengths_gene": "lengths_gene", + "lengths_transcript": "lengths_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment" + ], + key: "merge_quant_results", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | rsem_merge_counts.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "counts_gene": "rsem_counts_gene", + "counts_transcripts": "rsem_counts_transcripts" + ], + toState: [ + "tpm_gene": "merged_gene_tpm", + "counts_gene": "merged_gene_counts", + "tpm_transcript": "merged_transcript_tpm", + "counts_transcript": "merged_transcript_counts" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | deseq2_qc.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_deseq2_qc && !state.skip_align }, + fromState: { id, state -> + def counts = (state.aligner == "star_rsem") ? state.counts_gene : state.counts_gene_length_scaled + [ + counts: counts, + vst: state.deseq2_vst, + label: state.aligner + ] + }, + args: [count_col: 3, id_col: 1, outprefix: "deseq2"], + toState: [ + "deseq2_output": "outdir", + "deseq2_pca_multiqc": "pca_multiqc", + "deseq2_dists_multiqc": "dists_multiqc" + ], + key: "deseq2_qc_align_quant", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Merge quantification results of pseudo alignment + | merge_quant_results.run ( + runIf: { id, state -> !state.skip_pseudo_align }, + fromState: [ + "salmon_quant_results": "pseudo_salmon_quant_results", + "kallisto_quant_results": "pseudo_kallisto_quant_results", + "gtf": "gtf", + "gtf_extra_attributes": "gtf_extra_attributes", + "gtf_group_features": "gtf_group_features", + "quant_type": "pseudo_aligner" + ], + toState: [ + "pseudo_tpm_gene": "tpm_gene", + "pseudo_counts_gene": "counts_gene", + "pseudo_counts_gene_length_scaled": "counts_gene_length_scaled", + "pseudo_counts_gene_scaled": "counts_gene_scaled", + "pseudo_tpm_transcript": "tpm_transcript", + "pseudo_counts_transcript": "counts_transcript", + "pseudo_lengths_gene": "lengths_gene", + "pseudo_lengths_transcript": "lengths_transcript", + "pseudo_quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment" + ], + key: "merge_pseudo_quant_results", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | deseq2_qc.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_deseq2_qc && !state.skip_pseudo_align }, + fromState: [ + "counts": "pseudo_counts_gene_length_scaled", + "vst": "deseq2_vst", + "label": "pseudo_aligner" + ], + args: [count_col: 3, id_col: 1, outprefix: "deseq2"], + toState: [ + "deseq2_output_pseudo": "outdir", + "deseq2_pca_multiqc_pseudo": "pca_multiqc", + "deseq2_dists_multiqc_pseudo": "dists_multiqc" + ], + key: "deseq2_qc_pseuso_align_quant", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Get list of samples that failed trimming, mapping, and strand check for MultiQC report + | map { id, state -> + def fail_trimming_header = ["Sample", "Reads after trimming"] + def fail_trimming_multiqc = "" + def star_mapping_header = ["Sample", "STAR uniquely mapped reads (%)"] + def fail_mapping_multiqc = "" + def strand_check_header = ["Sample", "Provided strandedness", "Inferred strandedness", "Sense (%)", "Antisense (%)", "Undetermined (%)"] + def fail_strand_multiqc = "" + if (state.ids.size() > 0) { + fail_trimming_multiqc += "\${fail_trimming_header.join('\\t')}\\n" + fail_mapping_multiqc += "\${star_mapping_header.join('\\t')}\\n" + fail_strand_multiqc += "\${strand_check_header.join('\\t')}\\n" + for (i=0; i + state.each { key, value -> + if (value instanceof ArrayList) { + value.removeAll { it == null } + } + } + mod_state = state.findAll { key, value -> value != null } + [ id, mod_state ] + } + + | prepare_multiqc_input.run( + runIf: { id, state -> !state.skip_qc && !state.skip_multiqc }, + fromState: [ + "fail_trimming_multiqc": "fail_trimming_multiqc", + "fail_mapping_multiqc": "fail_mapping_multiqc", + "fail_strand_multiqc": "fail_strand_multiqc", + "fastqc_raw_multiqc": "fastqc_zip", + "fastqc_trim_multiqc": "trim_zip", + "trim_log_multiqc": "trim_log", + "sortmerna_multiqc": "sortmerna_multiqc", + "star_multiqc": "star_multiqc", + "salmon_multiqc": "salmon_multiqc", + "rsem_multiqc": "rsem_multiqc", + "pseudo_multiqc": "pseudo_multiqc", + "samtools_stats": "genome_bam_stats", + "samtools_flagstat": "genome_bam_flagstat", + "samtools_idxstats": "genome_bam_idxstats", + "markduplicates_multiqc": "markduplicates_multiqc", + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc", + "aligner_pca_multiqc": "deseq2_pca_multiqc", + "aligner_clustering_multiqc": "deseq2_dists_multiqc", + "pseudo_aligner_pca_multiqc": "deseq2_pca_multiqc_pseudo", + "pseudo_aligner_clustering_multiqc": "deseq2_dists_multiqc_pseudo", + "preseq_multiqc": "preseq_output", + // "qualimap_multiqc": "qualimap_output_dir", + "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", + "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", + "bamstat_multiqc": "bamstat_output", + "inferexperiment_multiqc": "inferexperiment_multiqc", + "innerdistance_multiqc": "inner_dist_output_freq", + "junctionannotation_multiqc": "junction_annotation_output_log", + "junctionsaturation_multiqc": "junction_saturation_output_plot_r", + "readdistribution_multiqc": "read_distribution_output", + "readduplication_multiqc": "read_duplication_output_duplication_rate_mapping", + "tin_multiqc": "tin_output_summary", + "multiqc_config": "multiqc_custom_config" + ], + toState: [ "multiqc_input": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | multiqc.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_multiqc }, + fromState: [ + "title": "multiqc_title", + "input": "multiqc_input", + ], + args: [exclude_modules: "general_stats"], + toState: [ + "multiqc_report": "output_report", + "multiqc_data": "output_data", + "multiqc_plots": "output_plots" + ], + directives: [ label: [ "highmem", "highcpu" ] ] + ) + + | map { id, state -> + [ + id, [ + tpm_gene: state.tpm_gene, + counts_gene: state.counts_gene, + counts_gene_length_scaled: state.counts_gene_length_scaled, + counts_gene_scaled: state.counts_gene_scaled, + tpm_transcript: state.tpm_transcript, + counts_transcript: state.counts_transcript, + quant_merged_summarizedexperiment: state.quant_merged_summarizedexperiment, + deseq2_output: state.deseq2_output, + pseudo_tpm_gene: state.pseudo_tpm_gene, + pseudo_counts_gene: state.pseudo_counts_gene, + pseudo_counts_gene_length_scaled: state.pseudo_counts_gene_length_scaled, + pseudo_counts_gene_scaled: state.pseudo_counts_gene_scaled, + pseudo_tpm_transcript: state.pseudo_tpm_transcript, + pseudo_counts_transcript: state.pseudo_counts_transcript, + pseudo_quant_merged_summarizedexperiment: state.pseudo_quant_merged_summarizedexperiment, + deseq2_output_pseudo: state.deseq2_output_pseudo, + multiqc_report: state.multiqc_report, + multiqc_data: state.multiqc_data, + multiqc_plots: state.multiqc_plots + ] + ] + } + + | map { list -> list[1]} + + output_ch = qc_ch + + | combine(merged_ch) + + | map { list -> [list[0], list[1] + list[2]] } + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + [ + "preseq_output": "preseq_output", + "bamstat_output": "bamstat_output", + "strandedness_output": "strandedness_output", + "inner_dist_output_stats": "inner_dist_output_stats", + "inner_dist_output_dist": "inner_dist_output_dist", + "inner_dist_output_freq": "inner_dist_output_freq", + "inner_dist_output_plot": "inner_dist_output_plot", + "inner_dist_output_plot_r": "inner_dist_output_plot_r", + "junction_annotation_output_log": "junction_annotation_output_log", + "junction_annotation_output_plot_r": "junction_annotation_output_plot_r", + "junction_annotation_output_junction_bed": "junction_annotation_output_junction_bed", + "junction_annotation_output_junction_interact": "junction_annotation_output_junction_interact", + "junction_annotation_output_junction_sheet": "junction_annotation_output_junction_sheet", + "junction_annotation_output_splice_events_plot": "junction_annotation_output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "junction_annotation_output_splice_junctions_plot", + "junction_saturation_output_plot_r": "junction_saturation_output_plot_r", + "junction_saturation_output_plot": "junction_saturation_output_plot", + "read_distribution_output": "read_distribution_output", + "read_duplication_output_duplication_rate_plot_r": "read_duplication_output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "read_duplication_output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "read_duplication_output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "read_duplication_output_duplication_rate_sequence", + "tin_output_summary": "tin_output_summary", + "tin_output_metrics": "tin_output_metrics", + "dupradar_output_dupmatrix": "dupradar_output_dupmatrix", + "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", + "dupradar_output_duprate_exp_boxplot": "dupradar_output_duprate_exp_boxplot", + "dupradar_output_duprate_exp_densplot": "dupradar_output_duprate_exp_densplot", + "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", + "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", + "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", + "featurecounts": "featurecounts", + "featurecounts_summary": "featurecounts_summary", + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc", + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "lengths_gene": "lengths_gene", + "lengths_transcript": "lengths_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment", + "deseq2_output": "deseq2_output", + "pseudo_tpm_gene": "pseudo_tpm_gene", + "pseudo_counts_gene": "pseudo_counts_gene", + "pseudo_counts_gene_length_scaled": "pseudo_counts_gene_length_scaled", + "pseudo_counts_gene_scaled": "pseudo_counts_gene_scaled", + "pseudo_tpm_transcript": "pseudo_tpm_transcript", + "pseudo_counts_transcript": "pseudo_counts_transcript", + "pseudo_lengths_gene": "pseudo_lengths_gene", + "pseudo_lengths_transcript": "pseudo_lengths_transcript", + "pseudo_quant_merged_summarizedexperiment": "pseudo_quant_merged_summarizedexperiment", + "deseq2_output_pseudo": "deseq2_output_pseudo", + "multiqc_report": "multiqc_report", + "multiqc_data": "multiqc_data", + "multiqc_plots": "multiqc_plots" + ] + ) + + emit: + output_ch +} + +// +// Function to check whether biotype field exists in GTF file +// +def biotypeInGtf(gtf_file, biotype) { + def hits = 0 + gtf_file.eachLine { line -> + def attributes = line.split('\\t')[-1].split() + if (attributes.contains(biotype)) { + hits += 1 + } + } + if (hits) { + return true + } else { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\\n" + + " Biotype attribute '\${biotype}' not found in the last column of the GTF file!\\n\\n" + + " Biotype QC will be skipped to circumvent the issue below:\\n" + + " https://github.com/nf-core/rnaseq/issues/460\\n\\n" + + " Amend '--featurecounts_group_type' to change this behaviour.\\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + return false + } +} + +// +// Function that parses and returns the predicted strandedness from the RSeQC infer_experiment.py output +// +def getInferexperimentStrandedness(inferexperiment_file, cutoff=30) { + def sense = 0 + def antisense = 0 + def undetermined = 0 + inferexperiment_file.eachLine { line -> + def undetermined_matcher = line =~ /Fraction of reads failed to determine:\\s([\\d\\.]+)/ + def se_sense_matcher = line =~ /Fraction of reads explained by "\\++,--":\\s([\\d\\.]+)/ + def se_antisense_matcher = line =~ /Fraction of reads explained by "\\+-,-\\+":\\s([\\d\\.]+)/ + def pe_sense_matcher = line =~ /Fraction of reads explained by "1\\++,1--,2\\+-,2-\\+":\\s([\\d\\.]+)/ + def pe_antisense_matcher = line =~ /Fraction of reads explained by "1\\+-,1-\\+,2\\+\\+,2--":\\s([\\d\\.]+)/ + if (undetermined_matcher) undetermined = undetermined_matcher[0][1].toFloat() * 100 + if (se_sense_matcher) sense = se_sense_matcher[0][1].toFloat() * 100 + if (se_antisense_matcher) antisense = se_antisense_matcher[0][1].toFloat() * 100 + if (pe_sense_matcher) sense = pe_sense_matcher[0][1].toFloat() * 100 + if (pe_antisense_matcher) antisense = pe_antisense_matcher[0][1].toFloat() * 100 + } + def strandedness = 'unstranded' + if (sense >= 100-cutoff) { + strandedness = 'forward' + } else if (antisense >= 100-cutoff) { + strandedness = 'reverse' + } + return [ strandedness, sense, antisense, undetermined ] +} +VIASHMAIN +nextflow run . -main-script "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_PRESEQ_OUTPUT" ] && [ ! -e "$VIASH_PAR_PRESEQ_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_PRESEQ_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BAMSTAT_OUTPUT" ] && [ ! -e "$VIASH_PAR_BAMSTAT_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_BAMSTAT_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STRANDEDNESS_OUTPUT" ] && [ ! -e "$VIASH_PAR_STRANDEDNESS_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_STRANDEDNESS_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R" ] && [ ! -e "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT" ] && [ ! -e "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT" ] && [ ! -e "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_READ_DISTRIBUTION_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R" ] && [ ! -e "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R" ]; then + ViashError "Output file '$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT" ] && [ ! -e "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT" ]; then + ViashError "Output file '$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING" ] && [ ! -e "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING" ]; then + ViashError "Output file '$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE" ] && [ ! -e "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE" ]; then + ViashError "Output file '$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TIN_OUTPUT_SUMMARY" ] && [ ! -e "$VIASH_PAR_TIN_OUTPUT_SUMMARY" ]; then + ViashError "Output file '$VIASH_PAR_TIN_OUTPUT_SUMMARY' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TIN_OUTPUT_METRICS" ] && [ ! -e "$VIASH_PAR_TIN_OUTPUT_METRICS" ]; then + ViashError "Output file '$VIASH_PAR_TIN_OUTPUT_METRICS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && [ ! -e "$VIASH_PAR_QUALIMAP_QC_REPORT" ]; then + ViashError "Output file '$VIASH_PAR_QUALIMAP_QC_REPORT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_COUNTS" ] && [ ! -e "$VIASH_PAR_QUALIMAP_COUNTS" ]; then + ViashError "Output file '$VIASH_PAR_QUALIMAP_COUNTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_REPORT" ] && [ ! -e "$VIASH_PAR_QUALIMAP_REPORT" ]; then + ViashError "Output file '$VIASH_PAR_QUALIMAP_REPORT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DESEQ2_OUTPUT" ] && [ ! -e "$VIASH_PAR_DESEQ2_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_DESEQ2_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO" ] && [ ! -e "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO" ]; then + ViashError "Output file '$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MULTIQC_REPORT" ] && [ ! -e "$VIASH_PAR_MULTIQC_REPORT" ]; then + ViashError "Output file '$VIASH_PAR_MULTIQC_REPORT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MULTIQC_DATA" ] && [ ! -e "$VIASH_PAR_MULTIQC_DATA" ]; then + ViashError "Output file '$VIASH_PAR_MULTIQC_DATA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MULTIQC_PLOTS" ] && [ ! -e "$VIASH_PAR_MULTIQC_PLOTS" ]; then + ViashError "Output file '$VIASH_PAR_MULTIQC_PLOTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TPM_GENE" ] && [ ! -e "$VIASH_PAR_TPM_GENE" ]; then + ViashError "Output file '$VIASH_PAR_TPM_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE_SCALED" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_GENE_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_TPM_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_TPM_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_COUNTS_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && [ ! -e "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ]; then + ViashError "Output file '$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_TPM_GENE" ] && [ ! -e "$VIASH_PAR_PSEUDO_TPM_GENE" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_TPM_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_GENE" ] && [ ! -e "$VIASH_PAR_PSEUDO_COUNTS_GENE" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_COUNTS_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -e "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED" ] && [ ! -e "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && [ ! -e "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/workflows/rnaseq/.config.vsh.yaml b/target/executable/workflows/rnaseq/.config.vsh.yaml new file mode 100644 index 0000000..cb10b3a --- /dev/null +++ b/target/executable/workflows/rnaseq/.config.vsh.yaml @@ -0,0 +1,2102 @@ +name: "rnaseq" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "ID of the sample." + info: null + example: + - "foo" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_1" + description: "Path to the sample (or read 1 of paired end sample)." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Path to read 2 of the sample." + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, reverse\ + \ or auto" + info: null + default: + - "auto" + required: false + choices: + - "unstranded" + - "forward" + - "reverse" + - "auto" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Reference genome options" + arguments: + - type: "file" + name: "--fasta" + description: "Path to FASTA genome file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "Path to GTF annotation file. This parameter is *mandatory* if --genome\ + \ is not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gff" + description: "Path to GFF3 annotation file. Required if \"--gtf\" is not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--additional_fasta" + description: "FASTA file to concatenate to genome FASTA file e.g. containing spike-in\ + \ sequences." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcript_fasta" + description: "Path to FASTA transcriptome file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gene_bed" + description: "Path to BED file containing gene intervals. This will be created\ + \ from the GTF file if not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--splicesites" + description: "Splice sites file required for HISAT2." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_index" + description: "Path to directory or tar.gz archive for pre-built STAR index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_index" + description: "Path to directory or tar.gz archive for pre-built RSEM index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_index" + description: "Path to directory or tar.gz archive for pre-built Salmon index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--kallisto_index" + description: "Path to directory or tar.gz archive for pre-built Kallisto index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--gencode" + description: "Specify if the GTF annotation is in GENCODE format." + info: null + direction: "input" + - type: "string" + name: "--gtf_extra_attributes" + description: "Additional gene identifiers from the input GTF file when running\ + \ Salmon. More than one value can be specified separated by comma." + info: null + default: + - "gene_name" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_group_features" + description: "Define the attribute type used to group features in the GTF file\ + \ when running Salmon." + info: null + default: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--featurecounts_group_type" + description: "The attribute type used to group feature types in the GTF file when\ + \ generating the biotype plot with featureCounts." + info: null + default: + - "gene_biotype" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--featurecounts_feature_type" + description: "By default, the pipeline assigns reads based on the 'exon' attribute\ + \ within the GTF file." + info: null + default: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--star_sjdb_gtf_feature_exon" + description: "Feature type in GTF file to be used as exons for building transcripts" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Read trimming options" + arguments: + - type: "string" + name: "--trimmer" + description: "Specify the trimming tool to use." + info: null + default: + - "trimgalore" + required: false + choices: + - "trimgalore" + - "fastp" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_trimmed_reads" + description: "Minimum number of trimmed reads below which samples are removed\ + \ from further processing. Some downstream steps in the pipeline will fail if\ + \ this threshold is too low." + info: null + default: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Read filtering options" + arguments: + - type: "file" + name: "--bbsplit_fasta_list" + description: "List of reference genomes (separated by \";\") to filter reads against\ + \ with BBSplit." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--bbsplit_index" + description: "Path to directory or tar.gz archive for pre-built BBSplit index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--remove_ribo_rna" + description: "Enable the removal of reads derived from ribosomal RNA using SortMeRNA." + info: null + direction: "input" + - type: "file" + name: "--ribo_database_manifest" + description: "Text file containing paths to fasta files (one per line) that will\ + \ be used to create the database for SortMeRNA." + info: null + default: + - "src/assets/rrna-db-defaults.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "UMI options" + arguments: + - type: "boolean_true" + name: "--with_umi" + description: "Enable UMI-based read deduplication." + info: null + direction: "input" + - type: "string" + name: "--umitools_extract_method" + description: "UMI pattern to use." + info: null + default: + - "string" + required: false + choices: + - "string" + - "regex" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_bc_pattern" + description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the\ + \ first 6 nucleotides of the read are from the UMI." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_bc_pattern2" + description: "The UMI barcode pattern to use if the UMI is located in read 2." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--umi_discard_read" + description: "After UMI barcode extraction discard either R1 or R2 by setting\ + \ this parameter to 1 or 2, respectively." + info: null + default: + - 0 + required: false + choices: + - 0 + - 1 + - 2 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_umi_separator" + description: "The character that separates the UMI in the read name. Most likely\ + \ a colon if you skipped the extraction with UMI-tools and used other software." + info: null + default: + - "_" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_grouping_method" + description: "Method to use to determine read groups by subsuming those with similar\ + \ UMIs. All methods start by identifying the reads with the same mapping position,\ + \ but treat similar yet nonidentical UMIs differently." + info: null + default: + - "directional" + required: false + choices: + - "unique" + - "percentile" + - "cluster" + - "adjacency" + - "directional" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--umi_dedup_stats" + description: "Generate output stats when running \"umi_tools dedup\"." + info: null + direction: "input" +- name: "Alignment options" + arguments: + - type: "string" + name: "--aligner" + description: "Specifies the alignment algorithm to use - available options are\ + \ 'star_salmon', 'star_rsem' and 'hisat2'." + info: null + default: + - "star_salmon" + required: false + choices: + - "star_salmon" + - "star_rsem" + - "hisat2" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--pseudo_aligner" + description: "Specifies the pseudo aligner to use - available options are 'salmon'.\ + \ Runs in addition to '--aligner'." + info: null + default: + - "salmon" + required: false + choices: + - "salmon" + - "kallisto" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--pseudo_aligner_kmer_size" + description: "Kmer length passed to indexing step of pseudoaligners." + info: null + default: + - 31 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--kallisto_quant_fragment_length" + description: "For single-end mode only, the estimated average fragment length\ + \ to use for quantification with Kallisto." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--kallisto_quant_fragment_length_sd" + description: "For single-end mode only, the estimated standard deviation of the\ + \ fragment length for quantification with Kallisto." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--bam_csi_index" + description: "Create a CSI index for BAM files instead of the traditional BAI\ + \ index. This will be required for genomes with larger chromosome sizes." + info: null + direction: "input" + - type: "string" + name: "--salmon_quant_libtype" + description: "Override Salmon library type inferred based on strandedness defined\ + \ in meta object." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_mapped_reads" + description: "Minimum percentage of uniquely mapped reads below which samples\ + \ are removed from further processing." + info: null + default: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--stringtie_ignore_gtf" + description: "Perform reference-guided de novo assembly of transcripts using StringTie,\ + \ i.e. don't restrict to those in GTF file." + info: null + direction: "input" + - type: "string" + name: "--extra_stringtie_args" + description: "Extra arguments to pass to stringtie command in addition to defaults\ + \ defined by the pipeline." + info: null + default: + - "-v" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--save_unaligned" + description: "Where possible, save unaligned reads from either STAR, HISAT2 or\ + \ Salmon to the results directory." + info: null + direction: "input" + - type: "boolean_true" + name: "--save_align_intermeds" + description: "Save the intermediate BAM files from the alignment step." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_alignment" + description: "Skip all of the alignment-based processes within the pipeline." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_pseudo_alignment" + description: "Skip all of the pseudo-alignment-based processes within the pipeline." + info: null + direction: "input" +- name: "Process skipping options" + arguments: + - type: "boolean" + name: "--skip_fastqc" + description: "Skip FatQC step." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_trimming" + description: "Skip the adapter trimming step." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_umi_extract" + description: "Skip umi_tools extract step." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--skip_qc" + description: "Skip all QC steps except for MultiQC." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_markduplicates" + description: "Skip picard MarkDuplicates step." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_stringtie" + description: "Skip StringTie." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_biotype_qc" + description: "Skip additional featureCounts process for biotype QC." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_bigwig" + description: "Skip bigWig file creation." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_preseq" + description: "Skip Preseq." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_dupradar" + description: "Skip dupRadar." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_qualimap" + description: "Skip Qualimap." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_rseqc" + description: "Skip RSeQC." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_multiqc" + description: "Skip MultiQC." + info: null + direction: "input" +- name: "Other process arguments" + arguments: + - type: "string" + name: "--extra_picard_args" + description: "Extra arguments to pass to picard MarkDuplicates command in addition\ + \ to defaults defined by the pipeline." + info: null + default: + - " --ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT\ + \ --TMP_DIR tmp" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_preseq_args" + description: "Extra arguments to pass to preseq lc_extrap command in addition\ + \ to defaults defined by the pipeline" + info: null + default: + - "-verbose -seed 1 -seg_len 100000000" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--deseq2_vst" + description: "Use vst transformation instead of rlog with DESeq2" + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--rseqc_modules" + description: "Specify the RSeQC modules to run_wf (comma-separated list)" + info: null + default: + - "bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication" + required: false + choices: + - "bam_stat" + - "inner_distance" + - "infer_experiment" + - "junction_annotation" + - "junction_saturation" + - "read_distribution" + - "read_duplication" + - "tin" + direction: "input" + multiple: true + multiple_sep: ";" +- name: "MultiQC paramenters" + arguments: + - type: "file" + name: "--multiqc_custom_config" + description: "Custom multiqc configuration file\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--multiqc_title" + description: "Custom multiqc title\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_methods_description" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_fasta" + info: null + default: + - "reference/genome.fasta" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_gtf" + info: null + default: + - "reference/gene_annotation.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_transcript_fasta" + info: null + default: + - "reference/transcriptome.fasta" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_gene_bed" + info: null + default: + - "reference/gene_annotation.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_star_index" + description: "Path to STAR index." + info: null + default: + - "reference/index/STAR" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_salmon_index" + description: "Path to Salmon index." + info: null + default: + - "reference/index/Salmon" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_bbsplit_index" + description: "Path to BBSplit index." + info: null + default: + - "reference/index/BBSplit" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_kallisto_index" + description: "Path to Kallisto index." + info: null + default: + - "reference/index/Kallisto" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_fastq_1" + description: "Path to output directory" + info: null + default: + - "fastq/${id}_r1.fastq.gz" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_fastq_2" + description: "Path to output directory" + info: null + default: + - "fastq/${id}_r2.fastq.gz" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_html_1" + description: "FastQC HTML report for read 1." + info: null + default: + - "fastqc_raw/${id}_r1.fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_html_2" + description: "FastQC HTML report for read 2." + info: null + default: + - "fastqc_raw/${id}_r2.fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_zip_1" + description: "FastQC report archive for read 1." + info: null + default: + - "fastqc_raw/${id}_r1.fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_zip_2" + description: "FastQC report archive for read 2." + info: null + default: + - "fastqc_raw/${id}_r2.fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_html_1" + info: null + default: + - "fastqc_trim/${id}_r1.trimmed_fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_html_2" + info: null + default: + - "fastqc_trim/${id}_r2.trimmed_fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_zip_1" + info: null + default: + - "fastqc_trim/${id}_r1.trimmed_fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_zip_2" + info: null + default: + - "fastqc_trim/${id}_r2.trimmed_fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_log_1" + info: null + default: + - "trimgalore/${id}_r1.trimming_report.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_log_2" + info: null + default: + - "trimgalore/${id}_r2.trimming_report.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastp_trim_json" + description: "The fastp json format report file name" + info: null + default: + - "fastp/$id_out.json" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastp_trim_html" + description: "The fastp html format report file name" + info: null + default: + - "fastp/$id_out.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sortmerna_log" + description: "Sortmerna log file." + info: null + default: + - "sortmerna/$id.log" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_alignment" + info: null + default: + - "STAR/$id" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_sorted" + info: null + default: + - "STAR/genome_processed/$id.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_index" + info: null + default: + - "STAR/genome_processed/$id.genome.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam" + info: null + default: + - "STAR/transcriptome_processed/$id.transcriptome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_index" + info: null + default: + - "STAR/transcriptome_processed/$id.transcriptome.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_log" + info: null + default: + - "STAR/log/$id.log" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_stats" + info: null + default: + - "samtools_stats/$id.genome.stats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_flagstat" + info: null + default: + - "samtools_stats/$id.genome.flagstat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_idxstats" + info: null + default: + - "samtools_stats/$id.genome.idxstats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_stats" + info: null + default: + - "samtools_stats/$id.transcriptome.stats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_flagstat" + info: null + default: + - "samtools_stats/$id.transcriptome.flagstat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_idxstats" + info: null + default: + - "samtools_stats/$id.transcriptome.idxstats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_quant_results" + info: null + default: + - "STAR_Salmon/$id" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_quant_results_file" + info: null + default: + - "STAR_Salmon/$id/quant.sf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_quant_results" + info: null + default: + - "Pseudo_align_quant/$id" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_counts_gene" + description: "Expression counts on gene level" + info: null + default: + - "RSEM/$id.genes.results" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_counts_transcripts" + description: "Expression counts on transcript level" + info: null + default: + - "RSEM/$id.isoforms.results" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_star_rsem" + description: "BAM file generated by STAR (from RSEM)" + info: null + default: + - "RSEM/$id.STAR.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_genome_rsem" + description: "Genome BAM file (from RSEM)" + info: null + default: + - "RSEM/$id.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_transcript_rsem" + description: "Transcript BAM file (from RSEM)" + info: null + default: + - "RSEM/$id.transcript.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_gene" + info: null + default: + - "transcript_quantification/gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene" + info: null + default: + - "transcript_quantification/gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_length_scaled" + info: null + default: + - "transcript_quantification/gene_counts_length_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_scaled" + info: null + default: + - "transcript_quantification/gene_counts_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_transcript" + info: null + default: + - "transcript_quantification/transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcript" + info: null + default: + - "transcript_quantification/transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_merged_summarizedexperiment" + info: null + default: + - "transcript_quantification/summarizedexperiment" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--markduplicates_metrics" + info: null + default: + - "picard/$id.MarkDuplicates.metrics.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_transcript_gtf" + info: null + default: + - "stringtie/$id.transcripts.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_coverage_gtf" + info: null + default: + - "stringtie/$id.coverage.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_abundance" + info: null + default: + - "stringtie/$id.gene_abundance.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_ballgown" + info: null + default: + - "stringtie/$id.ballgown" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts" + info: null + default: + - "featurecounts/$id.featureCounts.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_summary" + info: null + default: + - "featurecounts/$id.featureCounts.txt.summary" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_multiqc" + info: null + default: + - "featurecounts/$id.featureCounts_mqc.tsv" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_rrna_multiqc" + info: null + default: + - "featurecounts/$id.featureCounts_rrna_mqc.tsv" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bedgraph_forward" + info: null + default: + - "bedgraph/$id.forward.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bedgraph_reverse" + info: null + default: + - "bedgraph/$id.reverse.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bigwig_forward" + info: null + default: + - "bigwig/$id.forward.bigwig" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bigwig_reverse" + info: null + default: + - "bigwig/$id.reverse.bigwig" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--preseq_output" + info: null + default: + - "preseq/$id.lc_extrap.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bamstat_output" + description: "Path to output file (txt) of mapping quality statistics" + info: null + default: + - "RSeQC/bamstat/$id.mapping_quality.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--strandedness_output" + description: "Path to output report (txt) of inferred strandedness" + info: null + default: + - "RSeQC/inferexperiment/$id.strandedness.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_stats" + description: "output file (txt) with summary statistics of inner distances of\ + \ paired reads" + info: null + default: + - "RSeQC/innerdistance/$id.inner_distance.stats" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_dist" + description: "output file (txt) with inner distances of all paired reads" + info: null + default: + - "RSeQC/innerdistance/txt/$id.inner_distance.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_freq" + description: "output file (txt) with frequencies of inner distances of all paired\ + \ reads" + info: null + default: + - "RSeQC/innerdistance/txt/$id.inner_distance_freq.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_plot" + description: "output file (pdf) with histogram plot of of inner distances of all\ + \ paired reads" + info: null + default: + - "RSeQC/innerdistance/pdf/$id.inner_distance_plot.pdf" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_plot_r" + description: "output file (R) with script of histogram plot of of inner distances\ + \ of all paired reads" + info: null + default: + - "RSeQC/innerdistance/rscript/$id.inner_distance_plot.r" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_log" + description: "output log of junction annotation script" + info: null + default: + - "RSeQC/junctionannotation/log/$id.junction_annotation.log" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_plot_r" + description: "R script to generate splice_junction and splice_events plot" + info: null + default: + - "RSeQC/junctionannotation/rscript/$id.junction_annotation_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_junction_bed" + description: "junction annotation file (bed format)" + info: null + default: + - "RSeQC/junctionannotation/bed/$id.junction_annotation.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_junction_interact" + description: "interact file (bed format) of junctions. Can be uploaded to UCSC\ + \ genome browser or converted to bigInteract (using bedToBigBed program) for\ + \ visualization." + info: null + default: + - "RSeQC/junctionannotation/bed/$id.junction_annotation.Interact.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_junction_sheet" + description: "junction annotation file (xls format)" + info: null + default: + - "RSeQC/junctionannotation/xls/$id.junction_annotation.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_splice_events_plot" + description: "plot of splice events (pdf)" + info: null + default: + - "RSeQC/junctionannotation/pdf/$id.splice_events.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_splice_junctions_plot" + description: "plot of junctions (pdf)" + info: null + default: + - "RSeQC/junctionannotation/pdf/$id.splice_junctions_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_saturation_output_plot_r" + description: "r script to generate junction_saturation_plot plot" + info: null + default: + - "RSeQC/junctionsaturation/rscript/$id.junction_saturation_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_saturation_output_plot" + description: "plot of junction saturation (pdf" + info: null + default: + - "RSeQC/junctionsaturation/pdf/$id.junction_saturation_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_distribution_output" + description: "output file (txt) of read distribution analysis." + info: null + default: + - "RSeQC/readdistribution/$id.read_distribution.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_plot_r" + description: "R script for generating duplication rate plot" + info: null + default: + - "RSeQC/readduplication/rscrpt/$id.duplication_rate_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_plot" + description: "duplication rate plot (pdf)" + info: null + default: + - "RSeQC/readduplication/pdf/$id.duplication_rate_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_mapping" + description: "Summary of mapping-based read duplication" + info: null + default: + - "RSeQC/readduplication/xls/$id.duplication_rate_mapping.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_sequence" + description: "Summary of sequencing-based read duplication" + info: null + default: + - "RSeQC/readduplication/xls/$id.duplication_rate_sequencing.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tin_output_summary" + description: "summary statistics (txt) of calculated TIN metrics" + info: null + default: + - "RSeQC/tin/txt/$id.tin_summary.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tin_output_metrics" + description: "file with TIN metrics (xls)" + info: null + default: + - "RSeQC/tin/xls/$id.tin.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_dupmatrix" + description: "path to output file (txt) of duplicate tag counts" + info: null + default: + - "dupradar/gene_data/$id.dup_matrix.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_dup_intercept_mqc" + description: "path to output file (txt) of multiqc intercept value DupRadar" + info: null + default: + - "dupradar/mqc_intercept/$id.dup_intercept_mqc.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_duprate_exp_boxplot" + description: "path to output file (pdf) of distribution of expression box plot" + info: null + default: + - "dupradar/box_plot/$id.duprate_exp_boxplot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_duprate_exp_densplot" + description: "path to output file (pdf) of 2D density scatter plot of duplicate\ + \ tag counts" + info: null + default: + - "dupradar/scatter_plot/$id.duprate_exp_densityplot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_duprate_exp_denscurve_mqc" + description: "path to output file (pdf) of density curve of gene duplication multiqc" + info: null + default: + - "dupradar/density_curve/$id.duprate_exp_density_curve_mqc.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_expression_histogram" + description: "path to output file (pdf) of distribution of RPK values per gene\ + \ histogram" + info: null + default: + - "dupradar/histogram/$id.expression_hist.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_intercept_slope" + info: null + default: + - "dupradar/intercept_slope/$id.intercept_slope.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_qc_report" + description: "Text file containing the RNAseq QC results." + info: null + default: + - "Qualimap/$id.rnaseq_qc_results.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_counts" + description: "Output file for computed counts." + info: null + default: + - "Qualimap/$id.counts.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_report" + description: "Report output file. Supported formats are PDF or HTML." + info: null + default: + - "Qualimap/$id.report.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--deseq2_output" + info: null + default: + - "deseq2_qc" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--deseq2_output_pseudo" + info: null + default: + - "deseq2_qc_pseudo" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_report" + info: null + default: + - "multiqc/multiqc_report.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_data" + info: null + default: + - "multiqc/multiqc_data" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_plots" + info: null + default: + - "multiqc/multiqc_plots" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_versions" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_gene" + info: null + default: + - "pseudo_alignment_quantification/gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_gene_length_scaled" + info: null + default: + - "pseudo_alignment_quantification/gene_counts_length_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_gene_scaled" + info: null + default: + - "pseudo_alignment_quantification/gene_counts_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_tpm_gene" + info: null + default: + - "pseudo_alignment_quantification/gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_tpm_transcript" + info: null + default: + - "pseudo_alignment_quantification/transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_transcript" + info: null + default: + - "pseudo_alignment_quantification/transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_quant_merged_summarizedexperiment" + info: null + default: + - "pseudo_alignment_quantification/quant_merged_summarizedexperiment" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A viash workflow for the nf-core/rnaseq pipeline.\n" +test_resources: +- type: "nextflow_script" + path: "test.nf" + is_executable: true + entrypoint: "test_wf" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "workflows/prepare_genome" + repository: + type: "local" +- name: "cat_fastq" + repository: + type: "local" +- name: "workflows/pre_processing" + repository: + type: "local" +- name: "workflows/genome_alignment_and_quant" + repository: + type: "local" +- name: "workflows/pseudo_alignment_and_quant" + repository: + type: "local" +- name: "workflows/post_processing" + repository: + type: "local" +- name: "workflows/quality_control" + repository: + type: "local" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/rnaseq/config.vsh.yaml" + runner: "executable" + engine: "native" + output: "target/executable/workflows/rnaseq" + executable: "target/executable/workflows/rnaseq/rnaseq" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/nextflow/workflows/prepare_genome" + - "target/nextflow/cat_fastq" + - "target/nextflow/workflows/pre_processing" + - "target/nextflow/workflows/genome_alignment_and_quant" + - "target/nextflow/workflows/pseudo_alignment_and_quant" + - "target/nextflow/workflows/post_processing" + - "target/nextflow/workflows/quality_control" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/executable/workflows/rnaseq/nextflow_labels.config b/target/executable/workflows/rnaseq/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/workflows/rnaseq/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/workflows/rnaseq/rnaseq b/target/executable/workflows/rnaseq/rnaseq new file mode 100755 index 0000000..2a1115b --- /dev/null +++ b/target/executable/workflows/rnaseq/rnaseq @@ -0,0 +1,5392 @@ +#!/usr/bin/env bash + +# rnaseq v0.3.0 +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="rnaseq" +VIASH_META_FUNCTIONALITY_NAME="rnaseq" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rnaseq v0.3.0" + echo "" + echo "A viash workflow for the nf-core/rnaseq pipeline." + echo "" + echo "Input:" + echo " --id" + echo " type: string, required parameter" + echo " example: foo" + echo " ID of the sample." + echo "" + echo " --fastq_1" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " Path to the sample (or read 1 of paired end sample)." + echo "" + echo " --fastq_2" + echo " type: file, multiple values allowed" + echo " Path to read 2 of the sample." + echo "" + echo " --strandedness" + echo " type: string" + echo " default: auto" + echo " choices: [ unstranded, forward, reverse, auto ]" + echo " Sample strand-specificity. Must be one of unstranded, forward, reverse" + echo " or auto" + echo "" + echo "Reference genome options:" + echo " --fasta" + echo " type: file, required parameter, file must exist" + echo " Path to FASTA genome file." + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo " Path to GTF annotation file. This parameter is *mandatory* if --genome" + echo " is not specified." + echo "" + echo " --gff" + echo " type: file, file must exist" + echo " Path to GFF3 annotation file. Required if \"--gtf\" is not specified." + echo "" + echo " --additional_fasta" + echo " type: file, file must exist" + echo " FASTA file to concatenate to genome FASTA file e.g. containing spike-in" + echo " sequences." + echo "" + echo " --transcript_fasta" + echo " type: file, file must exist" + echo " Path to FASTA transcriptome file." + echo "" + echo " --gene_bed" + echo " type: file, file must exist" + echo " Path to BED file containing gene intervals. This will be created from" + echo " the GTF file if not specified." + echo "" + echo " --splicesites" + echo " type: file, file must exist" + echo " Splice sites file required for HISAT2." + echo "" + echo " --star_index" + echo " type: file, file must exist" + echo " Path to directory or tar.gz archive for pre-built STAR index." + echo "" + echo " --rsem_index" + echo " type: file, file must exist" + echo " Path to directory or tar.gz archive for pre-built RSEM index." + echo "" + echo " --salmon_index" + echo " type: file, file must exist" + echo " Path to directory or tar.gz archive for pre-built Salmon index." + echo "" + echo " --kallisto_index" + echo " type: file, file must exist" + echo " Path to directory or tar.gz archive for pre-built Kallisto index." + echo "" + echo " --gencode" + echo " type: boolean_true" + echo " Specify if the GTF annotation is in GENCODE format." + echo "" + echo " --gtf_extra_attributes" + echo " type: string" + echo " default: gene_name" + echo " Additional gene identifiers from the input GTF file when running Salmon." + echo " More than one value can be specified separated by comma." + echo "" + echo " --gtf_group_features" + echo " type: string" + echo " default: gene_id" + echo " Define the attribute type used to group features in the GTF file when" + echo " running Salmon." + echo "" + echo " --featurecounts_group_type" + echo " type: string" + echo " default: gene_biotype" + echo " The attribute type used to group feature types in the GTF file when" + echo " generating the biotype plot with featureCounts." + echo "" + echo " --featurecounts_feature_type" + echo " type: string" + echo " default: exon" + echo " By default, the pipeline assigns reads based on the 'exon' attribute" + echo " within the GTF file." + echo "" + echo " --star_sjdb_gtf_feature_exon" + echo " type: string" + echo " Feature type in GTF file to be used as exons for building transcripts" + echo "" + echo "Read trimming options:" + echo " --trimmer" + echo " type: string" + echo " default: trimgalore" + echo " choices: [ trimgalore, fastp ]" + echo " Specify the trimming tool to use." + echo "" + echo " --min_trimmed_reads" + echo " type: integer" + echo " default: 10000" + echo " Minimum number of trimmed reads below which samples are removed from" + echo " further processing. Some downstream steps in the pipeline will fail if" + echo " this threshold is too low." + echo "" + echo "Read filtering options:" + echo " --bbsplit_fasta_list" + echo " type: file, multiple values allowed, file must exist" + echo " List of reference genomes (separated by \";\") to filter reads against" + echo " with BBSplit." + echo "" + echo " --bbsplit_index" + echo " type: file, file must exist" + echo " Path to directory or tar.gz archive for pre-built BBSplit index." + echo "" + echo " --remove_ribo_rna" + echo " type: boolean_true" + echo " Enable the removal of reads derived from ribosomal RNA using SortMeRNA." + echo "" + echo " --ribo_database_manifest" + echo " type: file, file must exist" + echo " default: src/assets/rrna-db-defaults.txt" + echo " Text file containing paths to fasta files (one per line) that will be" + echo " used to create the database for SortMeRNA." + echo "" + echo "UMI options:" + echo " --with_umi" + echo " type: boolean_true" + echo " Enable UMI-based read deduplication." + echo "" + echo " --umitools_extract_method" + echo " type: string" + echo " default: string" + echo " choices: [ string, regex ]" + echo " UMI pattern to use." + echo "" + echo " --umitools_bc_pattern" + echo " type: string" + echo " The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6" + echo " nucleotides of the read are from the UMI." + echo "" + echo " --umitools_bc_pattern2" + echo " type: string" + echo " The UMI barcode pattern to use if the UMI is located in read 2." + echo "" + echo " --umi_discard_read" + echo " type: integer" + echo " default: 0" + echo " choices: [ 0, 1, 2 ]" + echo " After UMI barcode extraction discard either R1 or R2 by setting this" + echo " parameter to 1 or 2, respectively." + echo "" + echo " --umitools_umi_separator" + echo " type: string" + echo " default: _" + echo " The character that separates the UMI in the read name. Most likely a" + echo " colon if you skipped the extraction with UMI-tools and used other" + echo " software." + echo "" + echo " --umitools_grouping_method" + echo " type: string" + echo " default: directional" + echo " choices: [ unique, percentile, cluster, adjacency, directional ]" + echo " Method to use to determine read groups by subsuming those with similar" + echo " UMIs. All methods start by identifying the reads with the same mapping" + echo " position, but treat similar yet nonidentical UMIs differently." + echo "" + echo " --umi_dedup_stats" + echo " type: boolean_true" + echo " Generate output stats when running \"umi_tools dedup\"." + echo "" + echo "Alignment options:" + echo " --aligner" + echo " type: string" + echo " default: star_salmon" + echo " choices: [ star_salmon, star_rsem, hisat2 ]" + echo " Specifies the alignment algorithm to use - available options are" + echo " 'star_salmon', 'star_rsem' and 'hisat2'." + echo "" + echo " --pseudo_aligner" + echo " type: string" + echo " default: salmon" + echo " choices: [ salmon, kallisto ]" + echo " Specifies the pseudo aligner to use - available options are 'salmon'." + echo " Runs in addition to '--aligner'." + echo "" + echo " --pseudo_aligner_kmer_size" + echo " type: integer" + echo " default: 31" + echo " Kmer length passed to indexing step of pseudoaligners." + echo "" + echo " --kallisto_quant_fragment_length" + echo " type: double" + echo " For single-end mode only, the estimated average fragment length to use" + echo " for quantification with Kallisto." + echo "" + echo " --kallisto_quant_fragment_length_sd" + echo " type: double" + echo " For single-end mode only, the estimated standard deviation of the" + echo " fragment length for quantification with Kallisto." + echo "" + echo " --bam_csi_index" + echo " type: boolean_true" + echo " Create a CSI index for BAM files instead of the traditional BAI index." + echo " This will be required for genomes with larger chromosome sizes." + echo "" + echo " --salmon_quant_libtype" + echo " type: string" + echo " Override Salmon library type inferred based on strandedness defined in" + echo " meta object." + echo "" + echo " --min_mapped_reads" + echo " type: integer" + echo " default: 5" + echo " Minimum percentage of uniquely mapped reads below which samples are" + echo " removed from further processing." + echo "" + echo " --stringtie_ignore_gtf" + echo " type: boolean_true" + echo " Perform reference-guided de novo assembly of transcripts using" + echo " StringTie, i.e. don't restrict to those in GTF file." + echo "" + echo " --extra_stringtie_args" + echo " type: string" + echo " default: -v" + echo " Extra arguments to pass to stringtie command in addition to defaults" + echo " defined by the pipeline." + echo "" + echo " --save_unaligned" + echo " type: boolean_true" + echo " Where possible, save unaligned reads from either STAR, HISAT2 or Salmon" + echo " to the results directory." + echo "" + echo " --save_align_intermeds" + echo " type: boolean_true" + echo " Save the intermediate BAM files from the alignment step." + echo "" + echo " --skip_alignment" + echo " type: boolean_true" + echo " Skip all of the alignment-based processes within the pipeline." + echo "" + echo " --skip_pseudo_alignment" + echo " type: boolean_true" + echo " Skip all of the pseudo-alignment-based processes within the pipeline." + echo "" + echo "Process skipping options:" + echo " --skip_fastqc" + echo " type: boolean" + echo " default: false" + echo " Skip FatQC step." + echo "" + echo " --skip_trimming" + echo " type: boolean" + echo " default: false" + echo " Skip the adapter trimming step." + echo "" + echo " --skip_umi_extract" + echo " type: boolean" + echo " default: false" + echo " Skip umi_tools extract step." + echo "" + echo " --skip_qc" + echo " type: boolean_true" + echo " Skip all QC steps except for MultiQC." + echo "" + echo " --skip_markduplicates" + echo " type: boolean_true" + echo " Skip picard MarkDuplicates step." + echo "" + echo " --skip_stringtie" + echo " type: boolean_true" + echo " Skip StringTie." + echo "" + echo " --skip_biotype_qc" + echo " type: boolean_true" + echo " Skip additional featureCounts process for biotype QC." + echo "" + echo " --skip_bigwig" + echo " type: boolean_true" + echo " Skip bigWig file creation." + echo "" + echo " --skip_preseq" + echo " type: boolean_true" + echo " Skip Preseq." + echo "" + echo " --skip_dupradar" + echo " type: boolean_true" + echo " Skip dupRadar." + echo "" + echo " --skip_qualimap" + echo " type: boolean_true" + echo " Skip Qualimap." + echo "" + echo " --skip_rseqc" + echo " type: boolean_true" + echo " Skip RSeQC." + echo "" + echo " --skip_multiqc" + echo " type: boolean_true" + echo " Skip MultiQC." + echo "" + echo "Other process arguments:" + echo " --extra_picard_args" + echo " type: string" + echo " default: --ASSUME_SORTED true --REMOVE_DUPLICATES false" + echo "--VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" + echo " Extra arguments to pass to picard MarkDuplicates command in addition to" + echo " defaults defined by the pipeline." + echo "" + echo " --extra_preseq_args" + echo " type: string" + echo " default: -verbose -seed 1 -seg_len 100000000" + echo " Extra arguments to pass to preseq lc_extrap command in addition to" + echo " defaults defined by the pipeline" + echo "" + echo " --deseq2_vst" + echo " type: boolean" + echo " default: true" + echo " Use vst transformation instead of rlog with DESeq2" + echo "" + echo " --rseqc_modules" + echo " type: string, multiple values allowed" + echo " default:" + echo "bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication" + echo " choices: [ bam_stat, inner_distance, infer_experiment," + echo "junction_annotation, junction_saturation, read_distribution, read_duplication," + echo "tin ]" + echo " Specify the RSeQC modules to run_wf (comma-separated list)" + echo "" + echo "MultiQC paramenters:" + echo " --multiqc_custom_config" + echo " type: file, file must exist" + echo " Custom multiqc configuration file" + echo "" + echo " --multiqc_title" + echo " type: string" + echo " Custom multiqc title" + echo "" + echo " --multiqc_methods_description" + echo " type: file, file must exist" + echo "" + echo "Output:" + echo " --output_fasta" + echo " type: file, output, file must exist" + echo " default: reference/genome.fasta" + echo "" + echo " --output_gtf" + echo " type: file, output, file must exist" + echo " default: reference/gene_annotation.gtf" + echo "" + echo " --output_transcript_fasta" + echo " type: file, output, file must exist" + echo " default: reference/transcriptome.fasta" + echo "" + echo " --output_gene_bed" + echo " type: file, output, file must exist" + echo " default: reference/gene_annotation.bed" + echo "" + echo " --output_star_index" + echo " type: file, output, file must exist" + echo " default: reference/index/STAR" + echo " Path to STAR index." + echo "" + echo " --output_salmon_index" + echo " type: file, output, file must exist" + echo " default: reference/index/Salmon" + echo " Path to Salmon index." + echo "" + echo " --output_bbsplit_index" + echo " type: file, output, file must exist" + echo " default: reference/index/BBSplit" + echo " Path to BBSplit index." + echo "" + echo " --output_kallisto_index" + echo " type: file, output, file must exist" + echo " default: reference/index/Kallisto" + echo " Path to Kallisto index." + echo "" + echo " --output_fastq_1" + echo " type: file, output" + echo " default: fastq/\${id}_r1.fastq.gz" + echo " Path to output directory" + echo "" + echo " --output_fastq_2" + echo " type: file, output" + echo " default: fastq/\${id}_r2.fastq.gz" + echo " Path to output directory" + echo "" + echo " --fastqc_html_1" + echo " type: file, output" + echo " default: fastqc_raw/\${id}_r1.fastqc.html" + echo " FastQC HTML report for read 1." + echo "" + echo " --fastqc_html_2" + echo " type: file, output" + echo " default: fastqc_raw/\${id}_r2.fastqc.html" + echo " FastQC HTML report for read 2." + echo "" + echo " --fastqc_zip_1" + echo " type: file, output" + echo " default: fastqc_raw/\${id}_r1.fastqc.zip" + echo " FastQC report archive for read 1." + echo "" + echo " --fastqc_zip_2" + echo " type: file, output" + echo " default: fastqc_raw/\${id}_r2.fastqc.zip" + echo " FastQC report archive for read 2." + echo "" + echo " --trim_html_1" + echo " type: file, output" + echo " default: fastqc_trim/\${id}_r1.trimmed_fastqc.html" + echo "" + echo " --trim_html_2" + echo " type: file, output" + echo " default: fastqc_trim/\${id}_r2.trimmed_fastqc.html" + echo "" + echo " --trim_zip_1" + echo " type: file, output" + echo " default: fastqc_trim/\${id}_r1.trimmed_fastqc.zip" + echo "" + echo " --trim_zip_2" + echo " type: file, output" + echo " default: fastqc_trim/\${id}_r2.trimmed_fastqc.zip" + echo "" + echo " --trim_log_1" + echo " type: file, output" + echo " default: trimgalore/\${id}_r1.trimming_report.txt" + echo "" + echo " --trim_log_2" + echo " type: file, output" + echo " default: trimgalore/\${id}_r2.trimming_report.txt" + echo "" + echo " --fastp_trim_json" + echo " type: file, output, file must exist" + echo " default: fastp/\$id_out.json" + echo " The fastp json format report file name" + echo "" + echo " --fastp_trim_html" + echo " type: file, output, file must exist" + echo " default: fastp/\$id_out.html" + echo " The fastp html format report file name" + echo "" + echo " --sortmerna_log" + echo " type: file, output" + echo " default: sortmerna/\$id.log" + echo " Sortmerna log file." + echo "" + echo " --star_alignment" + echo " type: file, output, file must exist" + echo " default: STAR/\$id" + echo "" + echo " --genome_bam_sorted" + echo " type: file, output, file must exist" + echo " default: STAR/genome_processed/\$id.genome.bam" + echo "" + echo " --genome_bam_index" + echo " type: file, output, file must exist" + echo " default: STAR/genome_processed/\$id.genome.bam.bai" + echo "" + echo " --transcriptome_bam" + echo " type: file, output, file must exist" + echo " default: STAR/transcriptome_processed/\$id.transcriptome.bam" + echo "" + echo " --transcriptome_bam_index" + echo " type: file, output, file must exist" + echo " default: STAR/transcriptome_processed/\$id.transcriptome.bam.bai" + echo "" + echo " --star_log" + echo " type: file, output, file must exist" + echo " default: STAR/log/\$id.log" + echo "" + echo " --genome_bam_stats" + echo " type: file, output, file must exist" + echo " default: samtools_stats/\$id.genome.stats" + echo "" + echo " --genome_bam_flagstat" + echo " type: file, output, file must exist" + echo " default: samtools_stats/\$id.genome.flagstat" + echo "" + echo " --genome_bam_idxstats" + echo " type: file, output, file must exist" + echo " default: samtools_stats/\$id.genome.idxstats" + echo "" + echo " --transcriptome_bam_stats" + echo " type: file, output, file must exist" + echo " default: samtools_stats/\$id.transcriptome.stats" + echo "" + echo " --transcriptome_bam_flagstat" + echo " type: file, output, file must exist" + echo " default: samtools_stats/\$id.transcriptome.flagstat" + echo "" + echo " --transcriptome_bam_idxstats" + echo " type: file, output, file must exist" + echo " default: samtools_stats/\$id.transcriptome.idxstats" + echo "" + echo " --salmon_quant_results" + echo " type: file, output, file must exist" + echo " default: STAR_Salmon/\$id" + echo "" + echo " --salmon_quant_results_file" + echo " type: file, output, file must exist" + echo " default: STAR_Salmon/\$id/quant.sf" + echo "" + echo " --pseudo_quant_results" + echo " type: file, output, file must exist" + echo " default: Pseudo_align_quant/\$id" + echo "" + echo " --rsem_counts_gene" + echo " type: file, output, file must exist" + echo " default: RSEM/\$id.genes.results" + echo " Expression counts on gene level" + echo "" + echo " --rsem_counts_transcripts" + echo " type: file, output, file must exist" + echo " default: RSEM/\$id.isoforms.results" + echo " Expression counts on transcript level" + echo "" + echo " --bam_star_rsem" + echo " type: file, output, file must exist" + echo " default: RSEM/\$id.STAR.genome.bam" + echo " BAM file generated by STAR (from RSEM)" + echo "" + echo " --bam_genome_rsem" + echo " type: file, output, file must exist" + echo " default: RSEM/\$id.genome.bam" + echo " Genome BAM file (from RSEM)" + echo "" + echo " --bam_transcript_rsem" + echo " type: file, output, file must exist" + echo " default: RSEM/\$id.transcript.bam" + echo " Transcript BAM file (from RSEM)" + echo "" + echo " --tpm_gene" + echo " type: file, output, file must exist" + echo " default: transcript_quantification/gene_tpm.tsv" + echo "" + echo " --counts_gene" + echo " type: file, output, file must exist" + echo " default: transcript_quantification/gene_counts.tsv" + echo "" + echo " --counts_gene_length_scaled" + echo " type: file, output, file must exist" + echo " default: transcript_quantification/gene_counts_length_scaled.tsv" + echo "" + echo " --counts_gene_scaled" + echo " type: file, output, file must exist" + echo " default: transcript_quantification/gene_counts_scaled.tsv" + echo "" + echo " --tpm_transcript" + echo " type: file, output, file must exist" + echo " default: transcript_quantification/transcript_tpm.tsv" + echo "" + echo " --counts_transcript" + echo " type: file, output, file must exist" + echo " default: transcript_quantification/transcript_counts.tsv" + echo "" + echo " --quant_merged_summarizedexperiment" + echo " type: file, output, file must exist" + echo " default: transcript_quantification/summarizedexperiment" + echo "" + echo " --markduplicates_metrics" + echo " type: file, output, file must exist" + echo " default: picard/\$id.MarkDuplicates.metrics.txt" + echo "" + echo " --stringtie_transcript_gtf" + echo " type: file, output, file must exist" + echo " default: stringtie/\$id.transcripts.gtf" + echo "" + echo " --stringtie_coverage_gtf" + echo " type: file, output, file must exist" + echo " default: stringtie/\$id.coverage.gtf" + echo "" + echo " --stringtie_abundance" + echo " type: file, output, file must exist" + echo " default: stringtie/\$id.gene_abundance.txt" + echo "" + echo " --stringtie_ballgown" + echo " type: file, output, file must exist" + echo " default: stringtie/\$id.ballgown" + echo "" + echo " --featurecounts" + echo " type: file, output, file must exist" + echo " default: featurecounts/\$id.featureCounts.txt" + echo "" + echo " --featurecounts_summary" + echo " type: file, output, file must exist" + echo " default: featurecounts/\$id.featureCounts.txt.summary" + echo "" + echo " --featurecounts_multiqc" + echo " type: file, output" + echo " default: featurecounts/\$id.featureCounts_mqc.tsv" + echo "" + echo " --featurecounts_rrna_multiqc" + echo " type: file, output" + echo " default: featurecounts/\$id.featureCounts_rrna_mqc.tsv" + echo "" + echo " --bedgraph_forward" + echo " type: file, output, file must exist" + echo " default: bedgraph/\$id.forward.bedgraph" + echo "" + echo " --bedgraph_reverse" + echo " type: file, output, file must exist" + echo " default: bedgraph/\$id.reverse.bedgraph" + echo "" + echo " --bigwig_forward" + echo " type: file, output, file must exist" + echo " default: bigwig/\$id.forward.bigwig" + echo "" + echo " --bigwig_reverse" + echo " type: file, output, file must exist" + echo " default: bigwig/\$id.reverse.bigwig" + echo "" + echo " --preseq_output" + echo " type: file, output, file must exist" + echo " default: preseq/\$id.lc_extrap.txt" + echo "" + echo " --bamstat_output" + echo " type: file, output, file must exist" + echo " default: RSeQC/bamstat/\$id.mapping_quality.txt" + echo " Path to output file (txt) of mapping quality statistics" + echo "" + echo " --strandedness_output" + echo " type: file, output, file must exist" + echo " default: RSeQC/inferexperiment/\$id.strandedness.txt" + echo " Path to output report (txt) of inferred strandedness" + echo "" + echo " --inner_dist_output_stats" + echo " type: file, output" + echo " default: RSeQC/innerdistance/\$id.inner_distance.stats" + echo " output file (txt) with summary statistics of inner distances of paired" + echo " reads" + echo "" + echo " --inner_dist_output_dist" + echo " type: file, output" + echo " default: RSeQC/innerdistance/txt/\$id.inner_distance.txt" + echo " output file (txt) with inner distances of all paired reads" + echo "" + echo " --inner_dist_output_freq" + echo " type: file, output" + echo " default: RSeQC/innerdistance/txt/\$id.inner_distance_freq.txt" + echo " output file (txt) with frequencies of inner distances of all paired" + echo " reads" + echo "" + echo " --inner_dist_output_plot" + echo " type: file, output" + echo " default: RSeQC/innerdistance/pdf/\$id.inner_distance_plot.pdf" + echo " output file (pdf) with histogram plot of of inner distances of all" + echo " paired reads" + echo "" + echo " --inner_dist_output_plot_r" + echo " type: file, output" + echo " default: RSeQC/innerdistance/rscript/\$id.inner_distance_plot.r" + echo " output file (R) with script of histogram plot of of inner distances of" + echo " all paired reads" + echo "" + echo " --junction_annotation_output_log" + echo " type: file, output, file must exist" + echo " default: RSeQC/junctionannotation/log/\$id.junction_annotation.log" + echo " output log of junction annotation script" + echo "" + echo " --junction_annotation_output_plot_r" + echo " type: file, output, file must exist" + echo " default: RSeQC/junctionannotation/rscript/\$id.junction_annotation_plot.r" + echo " R script to generate splice_junction and splice_events plot" + echo "" + echo " --junction_annotation_output_junction_bed" + echo " type: file, output, file must exist" + echo " default: RSeQC/junctionannotation/bed/\$id.junction_annotation.bed" + echo " junction annotation file (bed format)" + echo "" + echo " --junction_annotation_output_junction_interact" + echo " type: file, output, file must exist" + echo " default:" + echo "RSeQC/junctionannotation/bed/\$id.junction_annotation.Interact.bed" + echo " interact file (bed format) of junctions. Can be uploaded to UCSC genome" + echo " browser or converted to bigInteract (using bedToBigBed program) for" + echo " visualization." + echo "" + echo " --junction_annotation_output_junction_sheet" + echo " type: file, output, file must exist" + echo " default: RSeQC/junctionannotation/xls/\$id.junction_annotation.xls" + echo " junction annotation file (xls format)" + echo "" + echo " --junction_annotation_output_splice_events_plot" + echo " type: file, output, file must exist" + echo " default: RSeQC/junctionannotation/pdf/\$id.splice_events.pdf" + echo " plot of splice events (pdf)" + echo "" + echo " --junction_annotation_output_splice_junctions_plot" + echo " type: file, output, file must exist" + echo " default: RSeQC/junctionannotation/pdf/\$id.splice_junctions_plot.pdf" + echo " plot of junctions (pdf)" + echo "" + echo " --junction_saturation_output_plot_r" + echo " type: file, output, file must exist" + echo " default: RSeQC/junctionsaturation/rscript/\$id.junction_saturation_plot.r" + echo " r script to generate junction_saturation_plot plot" + echo "" + echo " --junction_saturation_output_plot" + echo " type: file, output, file must exist" + echo " default: RSeQC/junctionsaturation/pdf/\$id.junction_saturation_plot.pdf" + echo " plot of junction saturation (pdf" + echo "" + echo " --read_distribution_output" + echo " type: file, output, file must exist" + echo " default: RSeQC/readdistribution/\$id.read_distribution.txt" + echo " output file (txt) of read distribution analysis." + echo "" + echo " --read_duplication_output_duplication_rate_plot_r" + echo " type: file, output, file must exist" + echo " default: RSeQC/readduplication/rscrpt/\$id.duplication_rate_plot.r" + echo " R script for generating duplication rate plot" + echo "" + echo " --read_duplication_output_duplication_rate_plot" + echo " type: file, output, file must exist" + echo " default: RSeQC/readduplication/pdf/\$id.duplication_rate_plot.pdf" + echo " duplication rate plot (pdf)" + echo "" + echo " --read_duplication_output_duplication_rate_mapping" + echo " type: file, output, file must exist" + echo " default: RSeQC/readduplication/xls/\$id.duplication_rate_mapping.xls" + echo " Summary of mapping-based read duplication" + echo "" + echo " --read_duplication_output_duplication_rate_sequence" + echo " type: file, output, file must exist" + echo " default: RSeQC/readduplication/xls/\$id.duplication_rate_sequencing.xls" + echo " Summary of sequencing-based read duplication" + echo "" + echo " --tin_output_summary" + echo " type: file, output, file must exist" + echo " default: RSeQC/tin/txt/\$id.tin_summary.txt" + echo " summary statistics (txt) of calculated TIN metrics" + echo "" + echo " --tin_output_metrics" + echo " type: file, output, file must exist" + echo " default: RSeQC/tin/xls/\$id.tin.xls" + echo " file with TIN metrics (xls)" + echo "" + echo " --dupradar_output_dupmatrix" + echo " type: file, output, file must exist" + echo " default: dupradar/gene_data/\$id.dup_matrix.txt" + echo " path to output file (txt) of duplicate tag counts" + echo "" + echo " --dupradar_output_dup_intercept_mqc" + echo " type: file, output, file must exist" + echo " default: dupradar/mqc_intercept/\$id.dup_intercept_mqc.txt" + echo " path to output file (txt) of multiqc intercept value DupRadar" + echo "" + echo " --dupradar_output_duprate_exp_boxplot" + echo " type: file, output, file must exist" + echo " default: dupradar/box_plot/\$id.duprate_exp_boxplot.pdf" + echo " path to output file (pdf) of distribution of expression box plot" + echo "" + echo " --dupradar_output_duprate_exp_densplot" + echo " type: file, output, file must exist" + echo " default: dupradar/scatter_plot/\$id.duprate_exp_densityplot.pdf" + echo " path to output file (pdf) of 2D density scatter plot of duplicate tag" + echo " counts" + echo "" + echo " --dupradar_output_duprate_exp_denscurve_mqc" + echo " type: file, output, file must exist" + echo " default: dupradar/density_curve/\$id.duprate_exp_density_curve_mqc.pdf" + echo " path to output file (pdf) of density curve of gene duplication multiqc" + echo "" + echo " --dupradar_output_expression_histogram" + echo " type: file, output, file must exist" + echo " default: dupradar/histogram/\$id.expression_hist.pdf" + echo " path to output file (pdf) of distribution of RPK values per gene" + echo " histogram" + echo "" + echo " --dupradar_output_intercept_slope" + echo " type: file, output, file must exist" + echo " default: dupradar/intercept_slope/\$id.intercept_slope.txt" + echo "" + echo " --qualimap_qc_report" + echo " type: file, output, file must exist" + echo " default: Qualimap/\$id.rnaseq_qc_results.txt" + echo " Text file containing the RNAseq QC results." + echo "" + echo " --qualimap_counts" + echo " type: file, output, file must exist" + echo " default: Qualimap/\$id.counts.txt" + echo " Output file for computed counts." + echo "" + echo " --qualimap_report" + echo " type: file, output, file must exist" + echo " default: Qualimap/\$id.report.html" + echo " Report output file. Supported formats are PDF or HTML." + echo "" + echo " --deseq2_output" + echo " type: file, output, file must exist" + echo " default: deseq2_qc" + echo "" + echo " --deseq2_output_pseudo" + echo " type: file, output, file must exist" + echo " default: deseq2_qc_pseudo" + echo "" + echo " --multiqc_report" + echo " type: file, output, file must exist" + echo " default: multiqc/multiqc_report.html" + echo "" + echo " --multiqc_data" + echo " type: file, output, file must exist" + echo " default: multiqc/multiqc_data" + echo "" + echo " --multiqc_plots" + echo " type: file, output, file must exist" + echo " default: multiqc/multiqc_plots" + echo "" + echo " --multiqc_versions" + echo " type: file, output, file must exist" + echo "" + echo " --pseudo_counts_gene" + echo " type: file, output, file must exist" + echo " default: pseudo_alignment_quantification/gene_counts.tsv" + echo "" + echo " --pseudo_counts_gene_length_scaled" + echo " type: file, output, file must exist" + echo " default: pseudo_alignment_quantification/gene_counts_length_scaled.tsv" + echo "" + echo " --pseudo_counts_gene_scaled" + echo " type: file, output, file must exist" + echo " default: pseudo_alignment_quantification/gene_counts_scaled.tsv" + echo "" + echo " --pseudo_tpm_gene" + echo " type: file, output, file must exist" + echo " default: pseudo_alignment_quantification/gene_tpm.tsv" + echo "" + echo " --pseudo_tpm_transcript" + echo " type: file, output, file must exist" + echo " default: pseudo_alignment_quantification/transcript_tpm.tsv" + echo "" + echo " --pseudo_counts_transcript" + echo " type: file, output, file must exist" + echo " default: pseudo_alignment_quantification/transcript_counts.tsv" + echo "" + echo " --pseudo_quant_merged_summarizedexperiment" + echo " type: file, output, file must exist" + echo " default:" + echo "pseudo_alignment_quantification/quant_merged_summarizedexperiment" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "rnaseq v0.3.0" + exit + ;; + --id) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id=*) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id=*\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastq_1) + if [ -z "$VIASH_PAR_FASTQ_1" ]; then + VIASH_PAR_FASTQ_1="$2" + else + VIASH_PAR_FASTQ_1="$VIASH_PAR_FASTQ_1;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_1=*) + if [ -z "$VIASH_PAR_FASTQ_1" ]; then + VIASH_PAR_FASTQ_1=$(ViashRemoveFlags "$1") + else + VIASH_PAR_FASTQ_1="$VIASH_PAR_FASTQ_1;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --fastq_2) + if [ -z "$VIASH_PAR_FASTQ_2" ]; then + VIASH_PAR_FASTQ_2="$2" + else + VIASH_PAR_FASTQ_2="$VIASH_PAR_FASTQ_2;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastq_2=*) + if [ -z "$VIASH_PAR_FASTQ_2" ]; then + VIASH_PAR_FASTQ_2=$(ViashRemoveFlags "$1") + else + VIASH_PAR_FASTQ_2="$VIASH_PAR_FASTQ_2;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --strandedness) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --strandedness. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strandedness=*) + [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness=*\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fasta) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fasta=*) + [ -n "$VIASH_PAR_FASTA" ] && ViashError Bad arguments for option \'--fasta=*\': \'$VIASH_PAR_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf=*) + [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gff) + [ -n "$VIASH_PAR_GFF" ] && ViashError Bad arguments for option \'--gff\': \'$VIASH_PAR_GFF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GFF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gff. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gff=*) + [ -n "$VIASH_PAR_GFF" ] && ViashError Bad arguments for option \'--gff=*\': \'$VIASH_PAR_GFF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GFF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --additional_fasta) + [ -n "$VIASH_PAR_ADDITIONAL_FASTA" ] && ViashError Bad arguments for option \'--additional_fasta\': \'$VIASH_PAR_ADDITIONAL_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ADDITIONAL_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --additional_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --additional_fasta=*) + [ -n "$VIASH_PAR_ADDITIONAL_FASTA" ] && ViashError Bad arguments for option \'--additional_fasta=*\': \'$VIASH_PAR_ADDITIONAL_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ADDITIONAL_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcript_fasta) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--transcript_fasta\': \'$VIASH_PAR_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcript_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcript_fasta=*) + [ -n "$VIASH_PAR_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--transcript_fasta=*\': \'$VIASH_PAR_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPT_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gene_bed) + [ -n "$VIASH_PAR_GENE_BED" ] && ViashError Bad arguments for option \'--gene_bed\': \'$VIASH_PAR_GENE_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENE_BED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gene_bed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gene_bed=*) + [ -n "$VIASH_PAR_GENE_BED" ] && ViashError Bad arguments for option \'--gene_bed=*\': \'$VIASH_PAR_GENE_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENE_BED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --splicesites) + [ -n "$VIASH_PAR_SPLICESITES" ] && ViashError Bad arguments for option \'--splicesites\': \'$VIASH_PAR_SPLICESITES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SPLICESITES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --splicesites. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --splicesites=*) + [ -n "$VIASH_PAR_SPLICESITES" ] && ViashError Bad arguments for option \'--splicesites=*\': \'$VIASH_PAR_SPLICESITES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SPLICESITES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --star_index) + [ -n "$VIASH_PAR_STAR_INDEX" ] && ViashError Bad arguments for option \'--star_index\': \'$VIASH_PAR_STAR_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_index=*) + [ -n "$VIASH_PAR_STAR_INDEX" ] && ViashError Bad arguments for option \'--star_index=*\': \'$VIASH_PAR_STAR_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --rsem_index) + [ -n "$VIASH_PAR_RSEM_INDEX" ] && ViashError Bad arguments for option \'--rsem_index\': \'$VIASH_PAR_RSEM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --rsem_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --rsem_index=*) + [ -n "$VIASH_PAR_RSEM_INDEX" ] && ViashError Bad arguments for option \'--rsem_index=*\': \'$VIASH_PAR_RSEM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --salmon_index) + [ -n "$VIASH_PAR_SALMON_INDEX" ] && ViashError Bad arguments for option \'--salmon_index\': \'$VIASH_PAR_SALMON_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_index=*) + [ -n "$VIASH_PAR_SALMON_INDEX" ] && ViashError Bad arguments for option \'--salmon_index=*\': \'$VIASH_PAR_SALMON_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --kallisto_index) + [ -n "$VIASH_PAR_KALLISTO_INDEX" ] && ViashError Bad arguments for option \'--kallisto_index\': \'$VIASH_PAR_KALLISTO_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --kallisto_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --kallisto_index=*) + [ -n "$VIASH_PAR_KALLISTO_INDEX" ] && ViashError Bad arguments for option \'--kallisto_index=*\': \'$VIASH_PAR_KALLISTO_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gencode) + [ -n "$VIASH_PAR_GENCODE" ] && ViashError Bad arguments for option \'--gencode\': \'$VIASH_PAR_GENCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENCODE=true + shift 1 + ;; + --gtf_extra_attributes) + [ -n "$VIASH_PAR_GTF_EXTRA_ATTRIBUTES" ] && ViashError Bad arguments for option \'--gtf_extra_attributes\': \'$VIASH_PAR_GTF_EXTRA_ATTRIBUTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_EXTRA_ATTRIBUTES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_extra_attributes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf_extra_attributes=*) + [ -n "$VIASH_PAR_GTF_EXTRA_ATTRIBUTES" ] && ViashError Bad arguments for option \'--gtf_extra_attributes=*\': \'$VIASH_PAR_GTF_EXTRA_ATTRIBUTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_EXTRA_ATTRIBUTES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gtf_group_features) + [ -n "$VIASH_PAR_GTF_GROUP_FEATURES" ] && ViashError Bad arguments for option \'--gtf_group_features\': \'$VIASH_PAR_GTF_GROUP_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_GROUP_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_group_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gtf_group_features=*) + [ -n "$VIASH_PAR_GTF_GROUP_FEATURES" ] && ViashError Bad arguments for option \'--gtf_group_features=*\': \'$VIASH_PAR_GTF_GROUP_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GTF_GROUP_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts_group_type) + [ -n "$VIASH_PAR_FEATURECOUNTS_GROUP_TYPE" ] && ViashError Bad arguments for option \'--featurecounts_group_type\': \'$VIASH_PAR_FEATURECOUNTS_GROUP_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_GROUP_TYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_group_type. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_group_type=*) + [ -n "$VIASH_PAR_FEATURECOUNTS_GROUP_TYPE" ] && ViashError Bad arguments for option \'--featurecounts_group_type=*\': \'$VIASH_PAR_FEATURECOUNTS_GROUP_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_GROUP_TYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts_feature_type) + [ -n "$VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE" ] && ViashError Bad arguments for option \'--featurecounts_feature_type\': \'$VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_feature_type. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_feature_type=*) + [ -n "$VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE" ] && ViashError Bad arguments for option \'--featurecounts_feature_type=*\': \'$VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --star_sjdb_gtf_feature_exon) + [ -n "$VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON" ] && ViashError Bad arguments for option \'--star_sjdb_gtf_feature_exon\': \'$VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_sjdb_gtf_feature_exon. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_sjdb_gtf_feature_exon=*) + [ -n "$VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON" ] && ViashError Bad arguments for option \'--star_sjdb_gtf_feature_exon=*\': \'$VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_SJDB_GTF_FEATURE_EXON=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trimmer) + [ -n "$VIASH_PAR_TRIMMER" ] && ViashError Bad arguments for option \'--trimmer\': \'$VIASH_PAR_TRIMMER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIMMER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trimmer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trimmer=*) + [ -n "$VIASH_PAR_TRIMMER" ] && ViashError Bad arguments for option \'--trimmer=*\': \'$VIASH_PAR_TRIMMER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIMMER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_trimmed_reads) + [ -n "$VIASH_PAR_MIN_TRIMMED_READS" ] && ViashError Bad arguments for option \'--min_trimmed_reads\': \'$VIASH_PAR_MIN_TRIMMED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_TRIMMED_READS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_trimmed_reads. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_trimmed_reads=*) + [ -n "$VIASH_PAR_MIN_TRIMMED_READS" ] && ViashError Bad arguments for option \'--min_trimmed_reads=*\': \'$VIASH_PAR_MIN_TRIMMED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_TRIMMED_READS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bbsplit_fasta_list) + if [ -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then + VIASH_PAR_BBSPLIT_FASTA_LIST="$2" + else + VIASH_PAR_BBSPLIT_FASTA_LIST="$VIASH_PAR_BBSPLIT_FASTA_LIST;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bbsplit_fasta_list. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bbsplit_fasta_list=*) + if [ -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then + VIASH_PAR_BBSPLIT_FASTA_LIST=$(ViashRemoveFlags "$1") + else + VIASH_PAR_BBSPLIT_FASTA_LIST="$VIASH_PAR_BBSPLIT_FASTA_LIST;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --bbsplit_index) + [ -n "$VIASH_PAR_BBSPLIT_INDEX" ] && ViashError Bad arguments for option \'--bbsplit_index\': \'$VIASH_PAR_BBSPLIT_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BBSPLIT_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bbsplit_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bbsplit_index=*) + [ -n "$VIASH_PAR_BBSPLIT_INDEX" ] && ViashError Bad arguments for option \'--bbsplit_index=*\': \'$VIASH_PAR_BBSPLIT_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BBSPLIT_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --remove_ribo_rna) + [ -n "$VIASH_PAR_REMOVE_RIBO_RNA" ] && ViashError Bad arguments for option \'--remove_ribo_rna\': \'$VIASH_PAR_REMOVE_RIBO_RNA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REMOVE_RIBO_RNA=true + shift 1 + ;; + --ribo_database_manifest) + [ -n "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ] && ViashError Bad arguments for option \'--ribo_database_manifest\': \'$VIASH_PAR_RIBO_DATABASE_MANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RIBO_DATABASE_MANIFEST="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --ribo_database_manifest. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --ribo_database_manifest=*) + [ -n "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ] && ViashError Bad arguments for option \'--ribo_database_manifest=*\': \'$VIASH_PAR_RIBO_DATABASE_MANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RIBO_DATABASE_MANIFEST=$(ViashRemoveFlags "$1") + shift 1 + ;; + --with_umi) + [ -n "$VIASH_PAR_WITH_UMI" ] && ViashError Bad arguments for option \'--with_umi\': \'$VIASH_PAR_WITH_UMI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WITH_UMI=true + shift 1 + ;; + --umitools_extract_method) + [ -n "$VIASH_PAR_UMITOOLS_EXTRACT_METHOD" ] && ViashError Bad arguments for option \'--umitools_extract_method\': \'$VIASH_PAR_UMITOOLS_EXTRACT_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_EXTRACT_METHOD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --umitools_extract_method. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --umitools_extract_method=*) + [ -n "$VIASH_PAR_UMITOOLS_EXTRACT_METHOD" ] && ViashError Bad arguments for option \'--umitools_extract_method=*\': \'$VIASH_PAR_UMITOOLS_EXTRACT_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_EXTRACT_METHOD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --umitools_bc_pattern) + [ -n "$VIASH_PAR_UMITOOLS_BC_PATTERN" ] && ViashError Bad arguments for option \'--umitools_bc_pattern\': \'$VIASH_PAR_UMITOOLS_BC_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_BC_PATTERN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --umitools_bc_pattern. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --umitools_bc_pattern=*) + [ -n "$VIASH_PAR_UMITOOLS_BC_PATTERN" ] && ViashError Bad arguments for option \'--umitools_bc_pattern=*\': \'$VIASH_PAR_UMITOOLS_BC_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_BC_PATTERN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --umitools_bc_pattern2) + [ -n "$VIASH_PAR_UMITOOLS_BC_PATTERN2" ] && ViashError Bad arguments for option \'--umitools_bc_pattern2\': \'$VIASH_PAR_UMITOOLS_BC_PATTERN2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_BC_PATTERN2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --umitools_bc_pattern2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --umitools_bc_pattern2=*) + [ -n "$VIASH_PAR_UMITOOLS_BC_PATTERN2" ] && ViashError Bad arguments for option \'--umitools_bc_pattern2=*\': \'$VIASH_PAR_UMITOOLS_BC_PATTERN2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_BC_PATTERN2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --umi_discard_read) + [ -n "$VIASH_PAR_UMI_DISCARD_READ" ] && ViashError Bad arguments for option \'--umi_discard_read\': \'$VIASH_PAR_UMI_DISCARD_READ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMI_DISCARD_READ="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --umi_discard_read. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --umi_discard_read=*) + [ -n "$VIASH_PAR_UMI_DISCARD_READ" ] && ViashError Bad arguments for option \'--umi_discard_read=*\': \'$VIASH_PAR_UMI_DISCARD_READ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMI_DISCARD_READ=$(ViashRemoveFlags "$1") + shift 1 + ;; + --umitools_umi_separator) + [ -n "$VIASH_PAR_UMITOOLS_UMI_SEPARATOR" ] && ViashError Bad arguments for option \'--umitools_umi_separator\': \'$VIASH_PAR_UMITOOLS_UMI_SEPARATOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_UMI_SEPARATOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --umitools_umi_separator. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --umitools_umi_separator=*) + [ -n "$VIASH_PAR_UMITOOLS_UMI_SEPARATOR" ] && ViashError Bad arguments for option \'--umitools_umi_separator=*\': \'$VIASH_PAR_UMITOOLS_UMI_SEPARATOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_UMI_SEPARATOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --umitools_grouping_method) + [ -n "$VIASH_PAR_UMITOOLS_GROUPING_METHOD" ] && ViashError Bad arguments for option \'--umitools_grouping_method\': \'$VIASH_PAR_UMITOOLS_GROUPING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_GROUPING_METHOD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --umitools_grouping_method. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --umitools_grouping_method=*) + [ -n "$VIASH_PAR_UMITOOLS_GROUPING_METHOD" ] && ViashError Bad arguments for option \'--umitools_grouping_method=*\': \'$VIASH_PAR_UMITOOLS_GROUPING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMITOOLS_GROUPING_METHOD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --umi_dedup_stats) + [ -n "$VIASH_PAR_UMI_DEDUP_STATS" ] && ViashError Bad arguments for option \'--umi_dedup_stats\': \'$VIASH_PAR_UMI_DEDUP_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UMI_DEDUP_STATS=true + shift 1 + ;; + --aligner) + [ -n "$VIASH_PAR_ALIGNER" ] && ViashError Bad arguments for option \'--aligner\': \'$VIASH_PAR_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --aligner. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --aligner=*) + [ -n "$VIASH_PAR_ALIGNER" ] && ViashError Bad arguments for option \'--aligner=*\': \'$VIASH_PAR_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_aligner) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER" ] && ViashError Bad arguments for option \'--pseudo_aligner\': \'$VIASH_PAR_PSEUDO_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_aligner. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_aligner=*) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER" ] && ViashError Bad arguments for option \'--pseudo_aligner=*\': \'$VIASH_PAR_PSEUDO_ALIGNER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_aligner_kmer_size) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE" ] && ViashError Bad arguments for option \'--pseudo_aligner_kmer_size\': \'$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_aligner_kmer_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_aligner_kmer_size=*) + [ -n "$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE" ] && ViashError Bad arguments for option \'--pseudo_aligner_kmer_size=*\': \'$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --kallisto_quant_fragment_length) + [ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" ] && ViashError Bad arguments for option \'--kallisto_quant_fragment_length\': \'$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --kallisto_quant_fragment_length. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --kallisto_quant_fragment_length=*) + [ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" ] && ViashError Bad arguments for option \'--kallisto_quant_fragment_length=*\': \'$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --kallisto_quant_fragment_length_sd) + [ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" ] && ViashError Bad arguments for option \'--kallisto_quant_fragment_length_sd\': \'$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --kallisto_quant_fragment_length_sd. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --kallisto_quant_fragment_length_sd=*) + [ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" ] && ViashError Bad arguments for option \'--kallisto_quant_fragment_length_sd=*\': \'$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bam_csi_index) + [ -n "$VIASH_PAR_BAM_CSI_INDEX" ] && ViashError Bad arguments for option \'--bam_csi_index\': \'$VIASH_PAR_BAM_CSI_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_CSI_INDEX=true + shift 1 + ;; + --salmon_quant_libtype) + [ -n "$VIASH_PAR_SALMON_QUANT_LIBTYPE" ] && ViashError Bad arguments for option \'--salmon_quant_libtype\': \'$VIASH_PAR_SALMON_QUANT_LIBTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_QUANT_LIBTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_quant_libtype. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_quant_libtype=*) + [ -n "$VIASH_PAR_SALMON_QUANT_LIBTYPE" ] && ViashError Bad arguments for option \'--salmon_quant_libtype=*\': \'$VIASH_PAR_SALMON_QUANT_LIBTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_QUANT_LIBTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_mapped_reads) + [ -n "$VIASH_PAR_MIN_MAPPED_READS" ] && ViashError Bad arguments for option \'--min_mapped_reads\': \'$VIASH_PAR_MIN_MAPPED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_MAPPED_READS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_mapped_reads. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_mapped_reads=*) + [ -n "$VIASH_PAR_MIN_MAPPED_READS" ] && ViashError Bad arguments for option \'--min_mapped_reads=*\': \'$VIASH_PAR_MIN_MAPPED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_MAPPED_READS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --stringtie_ignore_gtf) + [ -n "$VIASH_PAR_STRINGTIE_IGNORE_GTF" ] && ViashError Bad arguments for option \'--stringtie_ignore_gtf\': \'$VIASH_PAR_STRINGTIE_IGNORE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_IGNORE_GTF=true + shift 1 + ;; + --extra_stringtie_args) + [ -n "$VIASH_PAR_EXTRA_STRINGTIE_ARGS" ] && ViashError Bad arguments for option \'--extra_stringtie_args\': \'$VIASH_PAR_EXTRA_STRINGTIE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_STRINGTIE_ARGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_stringtie_args. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --extra_stringtie_args=*) + [ -n "$VIASH_PAR_EXTRA_STRINGTIE_ARGS" ] && ViashError Bad arguments for option \'--extra_stringtie_args=*\': \'$VIASH_PAR_EXTRA_STRINGTIE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_STRINGTIE_ARGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --save_unaligned) + [ -n "$VIASH_PAR_SAVE_UNALIGNED" ] && ViashError Bad arguments for option \'--save_unaligned\': \'$VIASH_PAR_SAVE_UNALIGNED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAVE_UNALIGNED=true + shift 1 + ;; + --save_align_intermeds) + [ -n "$VIASH_PAR_SAVE_ALIGN_INTERMEDS" ] && ViashError Bad arguments for option \'--save_align_intermeds\': \'$VIASH_PAR_SAVE_ALIGN_INTERMEDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAVE_ALIGN_INTERMEDS=true + shift 1 + ;; + --skip_alignment) + [ -n "$VIASH_PAR_SKIP_ALIGNMENT" ] && ViashError Bad arguments for option \'--skip_alignment\': \'$VIASH_PAR_SKIP_ALIGNMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_ALIGNMENT=true + shift 1 + ;; + --skip_pseudo_alignment) + [ -n "$VIASH_PAR_SKIP_PSEUDO_ALIGNMENT" ] && ViashError Bad arguments for option \'--skip_pseudo_alignment\': \'$VIASH_PAR_SKIP_PSEUDO_ALIGNMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_PSEUDO_ALIGNMENT=true + shift 1 + ;; + --skip_fastqc) + [ -n "$VIASH_PAR_SKIP_FASTQC" ] && ViashError Bad arguments for option \'--skip_fastqc\': \'$VIASH_PAR_SKIP_FASTQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_FASTQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_fastqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_fastqc=*) + [ -n "$VIASH_PAR_SKIP_FASTQC" ] && ViashError Bad arguments for option \'--skip_fastqc=*\': \'$VIASH_PAR_SKIP_FASTQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_FASTQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_trimming) + [ -n "$VIASH_PAR_SKIP_TRIMMING" ] && ViashError Bad arguments for option \'--skip_trimming\': \'$VIASH_PAR_SKIP_TRIMMING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_TRIMMING="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_trimming. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_trimming=*) + [ -n "$VIASH_PAR_SKIP_TRIMMING" ] && ViashError Bad arguments for option \'--skip_trimming=*\': \'$VIASH_PAR_SKIP_TRIMMING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_TRIMMING=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_umi_extract) + [ -n "$VIASH_PAR_SKIP_UMI_EXTRACT" ] && ViashError Bad arguments for option \'--skip_umi_extract\': \'$VIASH_PAR_SKIP_UMI_EXTRACT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_UMI_EXTRACT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --skip_umi_extract. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --skip_umi_extract=*) + [ -n "$VIASH_PAR_SKIP_UMI_EXTRACT" ] && ViashError Bad arguments for option \'--skip_umi_extract=*\': \'$VIASH_PAR_SKIP_UMI_EXTRACT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_UMI_EXTRACT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --skip_qc) + [ -n "$VIASH_PAR_SKIP_QC" ] && ViashError Bad arguments for option \'--skip_qc\': \'$VIASH_PAR_SKIP_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_QC=true + shift 1 + ;; + --skip_markduplicates) + [ -n "$VIASH_PAR_SKIP_MARKDUPLICATES" ] && ViashError Bad arguments for option \'--skip_markduplicates\': \'$VIASH_PAR_SKIP_MARKDUPLICATES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_MARKDUPLICATES=true + shift 1 + ;; + --skip_stringtie) + [ -n "$VIASH_PAR_SKIP_STRINGTIE" ] && ViashError Bad arguments for option \'--skip_stringtie\': \'$VIASH_PAR_SKIP_STRINGTIE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_STRINGTIE=true + shift 1 + ;; + --skip_biotype_qc) + [ -n "$VIASH_PAR_SKIP_BIOTYPE_QC" ] && ViashError Bad arguments for option \'--skip_biotype_qc\': \'$VIASH_PAR_SKIP_BIOTYPE_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_BIOTYPE_QC=true + shift 1 + ;; + --skip_bigwig) + [ -n "$VIASH_PAR_SKIP_BIGWIG" ] && ViashError Bad arguments for option \'--skip_bigwig\': \'$VIASH_PAR_SKIP_BIGWIG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_BIGWIG=true + shift 1 + ;; + --skip_preseq) + [ -n "$VIASH_PAR_SKIP_PRESEQ" ] && ViashError Bad arguments for option \'--skip_preseq\': \'$VIASH_PAR_SKIP_PRESEQ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_PRESEQ=true + shift 1 + ;; + --skip_dupradar) + [ -n "$VIASH_PAR_SKIP_DUPRADAR" ] && ViashError Bad arguments for option \'--skip_dupradar\': \'$VIASH_PAR_SKIP_DUPRADAR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_DUPRADAR=true + shift 1 + ;; + --skip_qualimap) + [ -n "$VIASH_PAR_SKIP_QUALIMAP" ] && ViashError Bad arguments for option \'--skip_qualimap\': \'$VIASH_PAR_SKIP_QUALIMAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_QUALIMAP=true + shift 1 + ;; + --skip_rseqc) + [ -n "$VIASH_PAR_SKIP_RSEQC" ] && ViashError Bad arguments for option \'--skip_rseqc\': \'$VIASH_PAR_SKIP_RSEQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_RSEQC=true + shift 1 + ;; + --skip_multiqc) + [ -n "$VIASH_PAR_SKIP_MULTIQC" ] && ViashError Bad arguments for option \'--skip_multiqc\': \'$VIASH_PAR_SKIP_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SKIP_MULTIQC=true + shift 1 + ;; + --extra_picard_args) + [ -n "$VIASH_PAR_EXTRA_PICARD_ARGS" ] && ViashError Bad arguments for option \'--extra_picard_args\': \'$VIASH_PAR_EXTRA_PICARD_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_PICARD_ARGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_picard_args. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --extra_picard_args=*) + [ -n "$VIASH_PAR_EXTRA_PICARD_ARGS" ] && ViashError Bad arguments for option \'--extra_picard_args=*\': \'$VIASH_PAR_EXTRA_PICARD_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_PICARD_ARGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --extra_preseq_args) + [ -n "$VIASH_PAR_EXTRA_PRESEQ_ARGS" ] && ViashError Bad arguments for option \'--extra_preseq_args\': \'$VIASH_PAR_EXTRA_PRESEQ_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_PRESEQ_ARGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_preseq_args. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --extra_preseq_args=*) + [ -n "$VIASH_PAR_EXTRA_PRESEQ_ARGS" ] && ViashError Bad arguments for option \'--extra_preseq_args=*\': \'$VIASH_PAR_EXTRA_PRESEQ_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXTRA_PRESEQ_ARGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --deseq2_vst) + [ -n "$VIASH_PAR_DESEQ2_VST" ] && ViashError Bad arguments for option \'--deseq2_vst\': \'$VIASH_PAR_DESEQ2_VST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DESEQ2_VST="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --deseq2_vst. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --deseq2_vst=*) + [ -n "$VIASH_PAR_DESEQ2_VST" ] && ViashError Bad arguments for option \'--deseq2_vst=*\': \'$VIASH_PAR_DESEQ2_VST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DESEQ2_VST=$(ViashRemoveFlags "$1") + shift 1 + ;; + --rseqc_modules) + if [ -z "$VIASH_PAR_RSEQC_MODULES" ]; then + VIASH_PAR_RSEQC_MODULES="$2" + else + VIASH_PAR_RSEQC_MODULES="$VIASH_PAR_RSEQC_MODULES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --rseqc_modules. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --rseqc_modules=*) + if [ -z "$VIASH_PAR_RSEQC_MODULES" ]; then + VIASH_PAR_RSEQC_MODULES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_RSEQC_MODULES="$VIASH_PAR_RSEQC_MODULES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --multiqc_custom_config) + [ -n "$VIASH_PAR_MULTIQC_CUSTOM_CONFIG" ] && ViashError Bad arguments for option \'--multiqc_custom_config\': \'$VIASH_PAR_MULTIQC_CUSTOM_CONFIG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_CUSTOM_CONFIG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_custom_config. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_custom_config=*) + [ -n "$VIASH_PAR_MULTIQC_CUSTOM_CONFIG" ] && ViashError Bad arguments for option \'--multiqc_custom_config=*\': \'$VIASH_PAR_MULTIQC_CUSTOM_CONFIG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_CUSTOM_CONFIG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --multiqc_title) + [ -n "$VIASH_PAR_MULTIQC_TITLE" ] && ViashError Bad arguments for option \'--multiqc_title\': \'$VIASH_PAR_MULTIQC_TITLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_TITLE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_title. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_title=*) + [ -n "$VIASH_PAR_MULTIQC_TITLE" ] && ViashError Bad arguments for option \'--multiqc_title=*\': \'$VIASH_PAR_MULTIQC_TITLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_TITLE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --multiqc_methods_description) + [ -n "$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION" ] && ViashError Bad arguments for option \'--multiqc_methods_description\': \'$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_METHODS_DESCRIPTION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_methods_description. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_methods_description=*) + [ -n "$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION" ] && ViashError Bad arguments for option \'--multiqc_methods_description=*\': \'$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_METHODS_DESCRIPTION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_fasta) + [ -n "$VIASH_PAR_OUTPUT_FASTA" ] && ViashError Bad arguments for option \'--output_fasta\': \'$VIASH_PAR_OUTPUT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_fasta=*) + [ -n "$VIASH_PAR_OUTPUT_FASTA" ] && ViashError Bad arguments for option \'--output_fasta=*\': \'$VIASH_PAR_OUTPUT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_gtf) + [ -n "$VIASH_PAR_OUTPUT_GTF" ] && ViashError Bad arguments for option \'--output_gtf\': \'$VIASH_PAR_OUTPUT_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_gtf=*) + [ -n "$VIASH_PAR_OUTPUT_GTF" ] && ViashError Bad arguments for option \'--output_gtf=*\': \'$VIASH_PAR_OUTPUT_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_transcript_fasta) + [ -n "$VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--output_transcript_fasta\': \'$VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_transcript_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_transcript_fasta=*) + [ -n "$VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA" ] && ViashError Bad arguments for option \'--output_transcript_fasta=*\': \'$VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_gene_bed) + [ -n "$VIASH_PAR_OUTPUT_GENE_BED" ] && ViashError Bad arguments for option \'--output_gene_bed\': \'$VIASH_PAR_OUTPUT_GENE_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_GENE_BED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_gene_bed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_gene_bed=*) + [ -n "$VIASH_PAR_OUTPUT_GENE_BED" ] && ViashError Bad arguments for option \'--output_gene_bed=*\': \'$VIASH_PAR_OUTPUT_GENE_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_GENE_BED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_star_index) + [ -n "$VIASH_PAR_OUTPUT_STAR_INDEX" ] && ViashError Bad arguments for option \'--output_star_index\': \'$VIASH_PAR_OUTPUT_STAR_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_STAR_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_star_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_star_index=*) + [ -n "$VIASH_PAR_OUTPUT_STAR_INDEX" ] && ViashError Bad arguments for option \'--output_star_index=*\': \'$VIASH_PAR_OUTPUT_STAR_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_STAR_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_salmon_index) + [ -n "$VIASH_PAR_OUTPUT_SALMON_INDEX" ] && ViashError Bad arguments for option \'--output_salmon_index\': \'$VIASH_PAR_OUTPUT_SALMON_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_SALMON_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_salmon_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_salmon_index=*) + [ -n "$VIASH_PAR_OUTPUT_SALMON_INDEX" ] && ViashError Bad arguments for option \'--output_salmon_index=*\': \'$VIASH_PAR_OUTPUT_SALMON_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_SALMON_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_bbsplit_index) + [ -n "$VIASH_PAR_OUTPUT_BBSPLIT_INDEX" ] && ViashError Bad arguments for option \'--output_bbsplit_index\': \'$VIASH_PAR_OUTPUT_BBSPLIT_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_BBSPLIT_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_bbsplit_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_bbsplit_index=*) + [ -n "$VIASH_PAR_OUTPUT_BBSPLIT_INDEX" ] && ViashError Bad arguments for option \'--output_bbsplit_index=*\': \'$VIASH_PAR_OUTPUT_BBSPLIT_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_BBSPLIT_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_kallisto_index) + [ -n "$VIASH_PAR_OUTPUT_KALLISTO_INDEX" ] && ViashError Bad arguments for option \'--output_kallisto_index\': \'$VIASH_PAR_OUTPUT_KALLISTO_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_KALLISTO_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_kallisto_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_kallisto_index=*) + [ -n "$VIASH_PAR_OUTPUT_KALLISTO_INDEX" ] && ViashError Bad arguments for option \'--output_kallisto_index=*\': \'$VIASH_PAR_OUTPUT_KALLISTO_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_KALLISTO_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_fastq_1) + [ -n "$VIASH_PAR_OUTPUT_FASTQ_1" ] && ViashError Bad arguments for option \'--output_fastq_1\': \'$VIASH_PAR_OUTPUT_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FASTQ_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_fastq_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_fastq_1=*) + [ -n "$VIASH_PAR_OUTPUT_FASTQ_1" ] && ViashError Bad arguments for option \'--output_fastq_1=*\': \'$VIASH_PAR_OUTPUT_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FASTQ_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_fastq_2) + [ -n "$VIASH_PAR_OUTPUT_FASTQ_2" ] && ViashError Bad arguments for option \'--output_fastq_2\': \'$VIASH_PAR_OUTPUT_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FASTQ_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_fastq_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_fastq_2=*) + [ -n "$VIASH_PAR_OUTPUT_FASTQ_2" ] && ViashError Bad arguments for option \'--output_fastq_2=*\': \'$VIASH_PAR_OUTPUT_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FASTQ_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastqc_html_1) + [ -n "$VIASH_PAR_FASTQC_HTML_1" ] && ViashError Bad arguments for option \'--fastqc_html_1\': \'$VIASH_PAR_FASTQC_HTML_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_HTML_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_html_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastqc_html_1=*) + [ -n "$VIASH_PAR_FASTQC_HTML_1" ] && ViashError Bad arguments for option \'--fastqc_html_1=*\': \'$VIASH_PAR_FASTQC_HTML_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_HTML_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastqc_html_2) + [ -n "$VIASH_PAR_FASTQC_HTML_2" ] && ViashError Bad arguments for option \'--fastqc_html_2\': \'$VIASH_PAR_FASTQC_HTML_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_HTML_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_html_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastqc_html_2=*) + [ -n "$VIASH_PAR_FASTQC_HTML_2" ] && ViashError Bad arguments for option \'--fastqc_html_2=*\': \'$VIASH_PAR_FASTQC_HTML_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_HTML_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastqc_zip_1) + [ -n "$VIASH_PAR_FASTQC_ZIP_1" ] && ViashError Bad arguments for option \'--fastqc_zip_1\': \'$VIASH_PAR_FASTQC_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_ZIP_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_zip_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastqc_zip_1=*) + [ -n "$VIASH_PAR_FASTQC_ZIP_1" ] && ViashError Bad arguments for option \'--fastqc_zip_1=*\': \'$VIASH_PAR_FASTQC_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_ZIP_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastqc_zip_2) + [ -n "$VIASH_PAR_FASTQC_ZIP_2" ] && ViashError Bad arguments for option \'--fastqc_zip_2\': \'$VIASH_PAR_FASTQC_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_ZIP_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_zip_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastqc_zip_2=*) + [ -n "$VIASH_PAR_FASTQC_ZIP_2" ] && ViashError Bad arguments for option \'--fastqc_zip_2=*\': \'$VIASH_PAR_FASTQC_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTQC_ZIP_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_html_1) + [ -n "$VIASH_PAR_TRIM_HTML_1" ] && ViashError Bad arguments for option \'--trim_html_1\': \'$VIASH_PAR_TRIM_HTML_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_HTML_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_html_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_html_1=*) + [ -n "$VIASH_PAR_TRIM_HTML_1" ] && ViashError Bad arguments for option \'--trim_html_1=*\': \'$VIASH_PAR_TRIM_HTML_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_HTML_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_html_2) + [ -n "$VIASH_PAR_TRIM_HTML_2" ] && ViashError Bad arguments for option \'--trim_html_2\': \'$VIASH_PAR_TRIM_HTML_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_HTML_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_html_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_html_2=*) + [ -n "$VIASH_PAR_TRIM_HTML_2" ] && ViashError Bad arguments for option \'--trim_html_2=*\': \'$VIASH_PAR_TRIM_HTML_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_HTML_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_zip_1) + [ -n "$VIASH_PAR_TRIM_ZIP_1" ] && ViashError Bad arguments for option \'--trim_zip_1\': \'$VIASH_PAR_TRIM_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_ZIP_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_zip_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_zip_1=*) + [ -n "$VIASH_PAR_TRIM_ZIP_1" ] && ViashError Bad arguments for option \'--trim_zip_1=*\': \'$VIASH_PAR_TRIM_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_ZIP_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_zip_2) + [ -n "$VIASH_PAR_TRIM_ZIP_2" ] && ViashError Bad arguments for option \'--trim_zip_2\': \'$VIASH_PAR_TRIM_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_ZIP_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_zip_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_zip_2=*) + [ -n "$VIASH_PAR_TRIM_ZIP_2" ] && ViashError Bad arguments for option \'--trim_zip_2=*\': \'$VIASH_PAR_TRIM_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_ZIP_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_log_1) + [ -n "$VIASH_PAR_TRIM_LOG_1" ] && ViashError Bad arguments for option \'--trim_log_1\': \'$VIASH_PAR_TRIM_LOG_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_LOG_1="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_log_1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_log_1=*) + [ -n "$VIASH_PAR_TRIM_LOG_1" ] && ViashError Bad arguments for option \'--trim_log_1=*\': \'$VIASH_PAR_TRIM_LOG_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_LOG_1=$(ViashRemoveFlags "$1") + shift 1 + ;; + --trim_log_2) + [ -n "$VIASH_PAR_TRIM_LOG_2" ] && ViashError Bad arguments for option \'--trim_log_2\': \'$VIASH_PAR_TRIM_LOG_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_LOG_2="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_log_2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --trim_log_2=*) + [ -n "$VIASH_PAR_TRIM_LOG_2" ] && ViashError Bad arguments for option \'--trim_log_2=*\': \'$VIASH_PAR_TRIM_LOG_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRIM_LOG_2=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastp_trim_json) + [ -n "$VIASH_PAR_FASTP_TRIM_JSON" ] && ViashError Bad arguments for option \'--fastp_trim_json\': \'$VIASH_PAR_FASTP_TRIM_JSON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTP_TRIM_JSON="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastp_trim_json. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastp_trim_json=*) + [ -n "$VIASH_PAR_FASTP_TRIM_JSON" ] && ViashError Bad arguments for option \'--fastp_trim_json=*\': \'$VIASH_PAR_FASTP_TRIM_JSON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTP_TRIM_JSON=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fastp_trim_html) + [ -n "$VIASH_PAR_FASTP_TRIM_HTML" ] && ViashError Bad arguments for option \'--fastp_trim_html\': \'$VIASH_PAR_FASTP_TRIM_HTML\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTP_TRIM_HTML="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastp_trim_html. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fastp_trim_html=*) + [ -n "$VIASH_PAR_FASTP_TRIM_HTML" ] && ViashError Bad arguments for option \'--fastp_trim_html=*\': \'$VIASH_PAR_FASTP_TRIM_HTML\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FASTP_TRIM_HTML=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sortmerna_log) + [ -n "$VIASH_PAR_SORTMERNA_LOG" ] && ViashError Bad arguments for option \'--sortmerna_log\': \'$VIASH_PAR_SORTMERNA_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SORTMERNA_LOG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sortmerna_log. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sortmerna_log=*) + [ -n "$VIASH_PAR_SORTMERNA_LOG" ] && ViashError Bad arguments for option \'--sortmerna_log=*\': \'$VIASH_PAR_SORTMERNA_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SORTMERNA_LOG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --star_alignment) + [ -n "$VIASH_PAR_STAR_ALIGNMENT" ] && ViashError Bad arguments for option \'--star_alignment\': \'$VIASH_PAR_STAR_ALIGNMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_ALIGNMENT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_alignment. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_alignment=*) + [ -n "$VIASH_PAR_STAR_ALIGNMENT" ] && ViashError Bad arguments for option \'--star_alignment=*\': \'$VIASH_PAR_STAR_ALIGNMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_ALIGNMENT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_sorted) + [ -n "$VIASH_PAR_GENOME_BAM_SORTED" ] && ViashError Bad arguments for option \'--genome_bam_sorted\': \'$VIASH_PAR_GENOME_BAM_SORTED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_SORTED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_sorted. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_sorted=*) + [ -n "$VIASH_PAR_GENOME_BAM_SORTED" ] && ViashError Bad arguments for option \'--genome_bam_sorted=*\': \'$VIASH_PAR_GENOME_BAM_SORTED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_SORTED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_index) + [ -n "$VIASH_PAR_GENOME_BAM_INDEX" ] && ViashError Bad arguments for option \'--genome_bam_index\': \'$VIASH_PAR_GENOME_BAM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_index=*) + [ -n "$VIASH_PAR_GENOME_BAM_INDEX" ] && ViashError Bad arguments for option \'--genome_bam_index=*\': \'$VIASH_PAR_GENOME_BAM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcriptome_bam) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM" ] && ViashError Bad arguments for option \'--transcriptome_bam\': \'$VIASH_PAR_TRANSCRIPTOME_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_bam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_bam=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM" ] && ViashError Bad arguments for option \'--transcriptome_bam=*\': \'$VIASH_PAR_TRANSCRIPTOME_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcriptome_bam_index) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX" ] && ViashError Bad arguments for option \'--transcriptome_bam_index\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_bam_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_bam_index=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX" ] && ViashError Bad arguments for option \'--transcriptome_bam_index=*\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --star_log) + [ -n "$VIASH_PAR_STAR_LOG" ] && ViashError Bad arguments for option \'--star_log\': \'$VIASH_PAR_STAR_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_LOG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --star_log. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --star_log=*) + [ -n "$VIASH_PAR_STAR_LOG" ] && ViashError Bad arguments for option \'--star_log=*\': \'$VIASH_PAR_STAR_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STAR_LOG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_stats) + [ -n "$VIASH_PAR_GENOME_BAM_STATS" ] && ViashError Bad arguments for option \'--genome_bam_stats\': \'$VIASH_PAR_GENOME_BAM_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_STATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_stats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_stats=*) + [ -n "$VIASH_PAR_GENOME_BAM_STATS" ] && ViashError Bad arguments for option \'--genome_bam_stats=*\': \'$VIASH_PAR_GENOME_BAM_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_STATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_flagstat) + [ -n "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && ViashError Bad arguments for option \'--genome_bam_flagstat\': \'$VIASH_PAR_GENOME_BAM_FLAGSTAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_FLAGSTAT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_flagstat. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_flagstat=*) + [ -n "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && ViashError Bad arguments for option \'--genome_bam_flagstat=*\': \'$VIASH_PAR_GENOME_BAM_FLAGSTAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_FLAGSTAT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genome_bam_idxstats) + [ -n "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && ViashError Bad arguments for option \'--genome_bam_idxstats\': \'$VIASH_PAR_GENOME_BAM_IDXSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_IDXSTATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_bam_idxstats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_bam_idxstats=*) + [ -n "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && ViashError Bad arguments for option \'--genome_bam_idxstats=*\': \'$VIASH_PAR_GENOME_BAM_IDXSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_BAM_IDXSTATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcriptome_bam_stats) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS" ] && ViashError Bad arguments for option \'--transcriptome_bam_stats\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_STATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_bam_stats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_bam_stats=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS" ] && ViashError Bad arguments for option \'--transcriptome_bam_stats=*\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_STATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcriptome_bam_flagstat) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT" ] && ViashError Bad arguments for option \'--transcriptome_bam_flagstat\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_bam_flagstat. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_bam_flagstat=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT" ] && ViashError Bad arguments for option \'--transcriptome_bam_flagstat=*\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcriptome_bam_idxstats) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS" ] && ViashError Bad arguments for option \'--transcriptome_bam_idxstats\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_bam_idxstats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_bam_idxstats=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS" ] && ViashError Bad arguments for option \'--transcriptome_bam_idxstats=*\': \'$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --salmon_quant_results) + [ -n "$VIASH_PAR_SALMON_QUANT_RESULTS" ] && ViashError Bad arguments for option \'--salmon_quant_results\': \'$VIASH_PAR_SALMON_QUANT_RESULTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_QUANT_RESULTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_quant_results. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_quant_results=*) + [ -n "$VIASH_PAR_SALMON_QUANT_RESULTS" ] && ViashError Bad arguments for option \'--salmon_quant_results=*\': \'$VIASH_PAR_SALMON_QUANT_RESULTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_QUANT_RESULTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --salmon_quant_results_file) + [ -n "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--salmon_quant_results_file\': \'$VIASH_PAR_SALMON_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_QUANT_RESULTS_FILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_quant_results_file. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --salmon_quant_results_file=*) + [ -n "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--salmon_quant_results_file=*\': \'$VIASH_PAR_SALMON_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SALMON_QUANT_RESULTS_FILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_quant_results) + [ -n "$VIASH_PAR_PSEUDO_QUANT_RESULTS" ] && ViashError Bad arguments for option \'--pseudo_quant_results\': \'$VIASH_PAR_PSEUDO_QUANT_RESULTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_QUANT_RESULTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_quant_results. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_quant_results=*) + [ -n "$VIASH_PAR_PSEUDO_QUANT_RESULTS" ] && ViashError Bad arguments for option \'--pseudo_quant_results=*\': \'$VIASH_PAR_PSEUDO_QUANT_RESULTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_QUANT_RESULTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --rsem_counts_gene) + [ -n "$VIASH_PAR_RSEM_COUNTS_GENE" ] && ViashError Bad arguments for option \'--rsem_counts_gene\': \'$VIASH_PAR_RSEM_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_COUNTS_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --rsem_counts_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --rsem_counts_gene=*) + [ -n "$VIASH_PAR_RSEM_COUNTS_GENE" ] && ViashError Bad arguments for option \'--rsem_counts_gene=*\': \'$VIASH_PAR_RSEM_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_COUNTS_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --rsem_counts_transcripts) + [ -n "$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS" ] && ViashError Bad arguments for option \'--rsem_counts_transcripts\': \'$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --rsem_counts_transcripts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --rsem_counts_transcripts=*) + [ -n "$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS" ] && ViashError Bad arguments for option \'--rsem_counts_transcripts=*\': \'$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bam_star_rsem) + [ -n "$VIASH_PAR_BAM_STAR_RSEM" ] && ViashError Bad arguments for option \'--bam_star_rsem\': \'$VIASH_PAR_BAM_STAR_RSEM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_STAR_RSEM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam_star_rsem. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam_star_rsem=*) + [ -n "$VIASH_PAR_BAM_STAR_RSEM" ] && ViashError Bad arguments for option \'--bam_star_rsem=*\': \'$VIASH_PAR_BAM_STAR_RSEM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_STAR_RSEM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bam_genome_rsem) + [ -n "$VIASH_PAR_BAM_GENOME_RSEM" ] && ViashError Bad arguments for option \'--bam_genome_rsem\': \'$VIASH_PAR_BAM_GENOME_RSEM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_GENOME_RSEM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam_genome_rsem. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam_genome_rsem=*) + [ -n "$VIASH_PAR_BAM_GENOME_RSEM" ] && ViashError Bad arguments for option \'--bam_genome_rsem=*\': \'$VIASH_PAR_BAM_GENOME_RSEM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_GENOME_RSEM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bam_transcript_rsem) + [ -n "$VIASH_PAR_BAM_TRANSCRIPT_RSEM" ] && ViashError Bad arguments for option \'--bam_transcript_rsem\': \'$VIASH_PAR_BAM_TRANSCRIPT_RSEM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_TRANSCRIPT_RSEM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam_transcript_rsem. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam_transcript_rsem=*) + [ -n "$VIASH_PAR_BAM_TRANSCRIPT_RSEM" ] && ViashError Bad arguments for option \'--bam_transcript_rsem=*\': \'$VIASH_PAR_BAM_TRANSCRIPT_RSEM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM_TRANSCRIPT_RSEM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tpm_gene) + [ -n "$VIASH_PAR_TPM_GENE" ] && ViashError Bad arguments for option \'--tpm_gene\': \'$VIASH_PAR_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tpm_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tpm_gene=*) + [ -n "$VIASH_PAR_TPM_GENE" ] && ViashError Bad arguments for option \'--tpm_gene=*\': \'$VIASH_PAR_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene) + [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene=*) + [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene=*\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene_length_scaled) + [ -n "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_length_scaled\': \'$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene_length_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene_length_scaled=*) + [ -n "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_length_scaled=*\': \'$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_gene_scaled) + [ -n "$VIASH_PAR_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_scaled\': \'$VIASH_PAR_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_gene_scaled=*) + [ -n "$VIASH_PAR_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_scaled=*\': \'$VIASH_PAR_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_GENE_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tpm_transcript) + [ -n "$VIASH_PAR_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--tpm_transcript\': \'$VIASH_PAR_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tpm_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tpm_transcript=*) + [ -n "$VIASH_PAR_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--tpm_transcript=*\': \'$VIASH_PAR_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TPM_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_transcript) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--counts_transcript\': \'$VIASH_PAR_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --counts_transcript=*) + [ -n "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--counts_transcript=*\': \'$VIASH_PAR_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quant_merged_summarizedexperiment) + [ -n "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && ViashError Bad arguments for option \'--quant_merged_summarizedexperiment\': \'$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_merged_summarizedexperiment. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quant_merged_summarizedexperiment=*) + [ -n "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && ViashError Bad arguments for option \'--quant_merged_summarizedexperiment=*\': \'$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --markduplicates_metrics) + [ -n "$VIASH_PAR_MARKDUPLICATES_METRICS" ] && ViashError Bad arguments for option \'--markduplicates_metrics\': \'$VIASH_PAR_MARKDUPLICATES_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MARKDUPLICATES_METRICS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --markduplicates_metrics. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --markduplicates_metrics=*) + [ -n "$VIASH_PAR_MARKDUPLICATES_METRICS" ] && ViashError Bad arguments for option \'--markduplicates_metrics=*\': \'$VIASH_PAR_MARKDUPLICATES_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MARKDUPLICATES_METRICS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --stringtie_transcript_gtf) + [ -n "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF" ] && ViashError Bad arguments for option \'--stringtie_transcript_gtf\': \'$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --stringtie_transcript_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --stringtie_transcript_gtf=*) + [ -n "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF" ] && ViashError Bad arguments for option \'--stringtie_transcript_gtf=*\': \'$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --stringtie_coverage_gtf) + [ -n "$VIASH_PAR_STRINGTIE_COVERAGE_GTF" ] && ViashError Bad arguments for option \'--stringtie_coverage_gtf\': \'$VIASH_PAR_STRINGTIE_COVERAGE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_COVERAGE_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --stringtie_coverage_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --stringtie_coverage_gtf=*) + [ -n "$VIASH_PAR_STRINGTIE_COVERAGE_GTF" ] && ViashError Bad arguments for option \'--stringtie_coverage_gtf=*\': \'$VIASH_PAR_STRINGTIE_COVERAGE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_COVERAGE_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --stringtie_abundance) + [ -n "$VIASH_PAR_STRINGTIE_ABUNDANCE" ] && ViashError Bad arguments for option \'--stringtie_abundance\': \'$VIASH_PAR_STRINGTIE_ABUNDANCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_ABUNDANCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --stringtie_abundance. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --stringtie_abundance=*) + [ -n "$VIASH_PAR_STRINGTIE_ABUNDANCE" ] && ViashError Bad arguments for option \'--stringtie_abundance=*\': \'$VIASH_PAR_STRINGTIE_ABUNDANCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_ABUNDANCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --stringtie_ballgown) + [ -n "$VIASH_PAR_STRINGTIE_BALLGOWN" ] && ViashError Bad arguments for option \'--stringtie_ballgown\': \'$VIASH_PAR_STRINGTIE_BALLGOWN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_BALLGOWN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --stringtie_ballgown. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --stringtie_ballgown=*) + [ -n "$VIASH_PAR_STRINGTIE_BALLGOWN" ] && ViashError Bad arguments for option \'--stringtie_ballgown=*\': \'$VIASH_PAR_STRINGTIE_BALLGOWN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRINGTIE_BALLGOWN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts) + [ -n "$VIASH_PAR_FEATURECOUNTS" ] && ViashError Bad arguments for option \'--featurecounts\': \'$VIASH_PAR_FEATURECOUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts=*) + [ -n "$VIASH_PAR_FEATURECOUNTS" ] && ViashError Bad arguments for option \'--featurecounts=*\': \'$VIASH_PAR_FEATURECOUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts_summary) + [ -n "$VIASH_PAR_FEATURECOUNTS_SUMMARY" ] && ViashError Bad arguments for option \'--featurecounts_summary\': \'$VIASH_PAR_FEATURECOUNTS_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_SUMMARY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_summary. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_summary=*) + [ -n "$VIASH_PAR_FEATURECOUNTS_SUMMARY" ] && ViashError Bad arguments for option \'--featurecounts_summary=*\': \'$VIASH_PAR_FEATURECOUNTS_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_SUMMARY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts_multiqc) + [ -n "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ] && ViashError Bad arguments for option \'--featurecounts_multiqc\': \'$VIASH_PAR_FEATURECOUNTS_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_multiqc=*) + [ -n "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ] && ViashError Bad arguments for option \'--featurecounts_multiqc=*\': \'$VIASH_PAR_FEATURECOUNTS_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --featurecounts_rrna_multiqc) + [ -n "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ] && ViashError Bad arguments for option \'--featurecounts_rrna_multiqc\': \'$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --featurecounts_rrna_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --featurecounts_rrna_multiqc=*) + [ -n "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ] && ViashError Bad arguments for option \'--featurecounts_rrna_multiqc=*\': \'$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bedgraph_forward) + [ -n "$VIASH_PAR_BEDGRAPH_FORWARD" ] && ViashError Bad arguments for option \'--bedgraph_forward\': \'$VIASH_PAR_BEDGRAPH_FORWARD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH_FORWARD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bedgraph_forward. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bedgraph_forward=*) + [ -n "$VIASH_PAR_BEDGRAPH_FORWARD" ] && ViashError Bad arguments for option \'--bedgraph_forward=*\': \'$VIASH_PAR_BEDGRAPH_FORWARD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH_FORWARD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bedgraph_reverse) + [ -n "$VIASH_PAR_BEDGRAPH_REVERSE" ] && ViashError Bad arguments for option \'--bedgraph_reverse\': \'$VIASH_PAR_BEDGRAPH_REVERSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH_REVERSE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bedgraph_reverse. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bedgraph_reverse=*) + [ -n "$VIASH_PAR_BEDGRAPH_REVERSE" ] && ViashError Bad arguments for option \'--bedgraph_reverse=*\': \'$VIASH_PAR_BEDGRAPH_REVERSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BEDGRAPH_REVERSE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bigwig_forward) + [ -n "$VIASH_PAR_BIGWIG_FORWARD" ] && ViashError Bad arguments for option \'--bigwig_forward\': \'$VIASH_PAR_BIGWIG_FORWARD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIGWIG_FORWARD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bigwig_forward. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bigwig_forward=*) + [ -n "$VIASH_PAR_BIGWIG_FORWARD" ] && ViashError Bad arguments for option \'--bigwig_forward=*\': \'$VIASH_PAR_BIGWIG_FORWARD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIGWIG_FORWARD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bigwig_reverse) + [ -n "$VIASH_PAR_BIGWIG_REVERSE" ] && ViashError Bad arguments for option \'--bigwig_reverse\': \'$VIASH_PAR_BIGWIG_REVERSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIGWIG_REVERSE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bigwig_reverse. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bigwig_reverse=*) + [ -n "$VIASH_PAR_BIGWIG_REVERSE" ] && ViashError Bad arguments for option \'--bigwig_reverse=*\': \'$VIASH_PAR_BIGWIG_REVERSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BIGWIG_REVERSE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --preseq_output) + [ -n "$VIASH_PAR_PRESEQ_OUTPUT" ] && ViashError Bad arguments for option \'--preseq_output\': \'$VIASH_PAR_PRESEQ_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PRESEQ_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --preseq_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --preseq_output=*) + [ -n "$VIASH_PAR_PRESEQ_OUTPUT" ] && ViashError Bad arguments for option \'--preseq_output=*\': \'$VIASH_PAR_PRESEQ_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PRESEQ_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bamstat_output) + [ -n "$VIASH_PAR_BAMSTAT_OUTPUT" ] && ViashError Bad arguments for option \'--bamstat_output\': \'$VIASH_PAR_BAMSTAT_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMSTAT_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamstat_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bamstat_output=*) + [ -n "$VIASH_PAR_BAMSTAT_OUTPUT" ] && ViashError Bad arguments for option \'--bamstat_output=*\': \'$VIASH_PAR_BAMSTAT_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMSTAT_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --strandedness_output) + [ -n "$VIASH_PAR_STRANDEDNESS_OUTPUT" ] && ViashError Bad arguments for option \'--strandedness_output\': \'$VIASH_PAR_STRANDEDNESS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --strandedness_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strandedness_output=*) + [ -n "$VIASH_PAR_STRANDEDNESS_OUTPUT" ] && ViashError Bad arguments for option \'--strandedness_output=*\': \'$VIASH_PAR_STRANDEDNESS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDEDNESS_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --inner_dist_output_stats) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_STATS" ] && ViashError Bad arguments for option \'--inner_dist_output_stats\': \'$VIASH_PAR_INNER_DIST_OUTPUT_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_STATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --inner_dist_output_stats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --inner_dist_output_stats=*) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_STATS" ] && ViashError Bad arguments for option \'--inner_dist_output_stats=*\': \'$VIASH_PAR_INNER_DIST_OUTPUT_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_STATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --inner_dist_output_dist) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_DIST" ] && ViashError Bad arguments for option \'--inner_dist_output_dist\': \'$VIASH_PAR_INNER_DIST_OUTPUT_DIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_DIST="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --inner_dist_output_dist. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --inner_dist_output_dist=*) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_DIST" ] && ViashError Bad arguments for option \'--inner_dist_output_dist=*\': \'$VIASH_PAR_INNER_DIST_OUTPUT_DIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_DIST=$(ViashRemoveFlags "$1") + shift 1 + ;; + --inner_dist_output_freq) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_FREQ" ] && ViashError Bad arguments for option \'--inner_dist_output_freq\': \'$VIASH_PAR_INNER_DIST_OUTPUT_FREQ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_FREQ="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --inner_dist_output_freq. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --inner_dist_output_freq=*) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_FREQ" ] && ViashError Bad arguments for option \'--inner_dist_output_freq=*\': \'$VIASH_PAR_INNER_DIST_OUTPUT_FREQ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_FREQ=$(ViashRemoveFlags "$1") + shift 1 + ;; + --inner_dist_output_plot) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT" ] && ViashError Bad arguments for option \'--inner_dist_output_plot\': \'$VIASH_PAR_INNER_DIST_OUTPUT_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --inner_dist_output_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --inner_dist_output_plot=*) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT" ] && ViashError Bad arguments for option \'--inner_dist_output_plot=*\': \'$VIASH_PAR_INNER_DIST_OUTPUT_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --inner_dist_output_plot_r) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--inner_dist_output_plot_r\': \'$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --inner_dist_output_plot_r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --inner_dist_output_plot_r=*) + [ -n "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--inner_dist_output_plot_r=*\': \'$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_log) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG" ] && ViashError Bad arguments for option \'--junction_annotation_output_log\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_log. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_log=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG" ] && ViashError Bad arguments for option \'--junction_annotation_output_log=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_plot_r) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--junction_annotation_output_plot_r\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_plot_r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_plot_r=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--junction_annotation_output_plot_r=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_junction_bed) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED" ] && ViashError Bad arguments for option \'--junction_annotation_output_junction_bed\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_junction_bed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_junction_bed=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED" ] && ViashError Bad arguments for option \'--junction_annotation_output_junction_bed=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_junction_interact) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT" ] && ViashError Bad arguments for option \'--junction_annotation_output_junction_interact\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_junction_interact. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_junction_interact=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT" ] && ViashError Bad arguments for option \'--junction_annotation_output_junction_interact=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_junction_sheet) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET" ] && ViashError Bad arguments for option \'--junction_annotation_output_junction_sheet\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_junction_sheet. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_junction_sheet=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET" ] && ViashError Bad arguments for option \'--junction_annotation_output_junction_sheet=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_splice_events_plot) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT" ] && ViashError Bad arguments for option \'--junction_annotation_output_splice_events_plot\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_splice_events_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_splice_events_plot=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT" ] && ViashError Bad arguments for option \'--junction_annotation_output_splice_events_plot=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_annotation_output_splice_junctions_plot) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT" ] && ViashError Bad arguments for option \'--junction_annotation_output_splice_junctions_plot\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_annotation_output_splice_junctions_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_annotation_output_splice_junctions_plot=*) + [ -n "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT" ] && ViashError Bad arguments for option \'--junction_annotation_output_splice_junctions_plot=*\': \'$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_saturation_output_plot_r) + [ -n "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--junction_saturation_output_plot_r\': \'$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_saturation_output_plot_r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_saturation_output_plot_r=*) + [ -n "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--junction_saturation_output_plot_r=*\': \'$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R=$(ViashRemoveFlags "$1") + shift 1 + ;; + --junction_saturation_output_plot) + [ -n "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT" ] && ViashError Bad arguments for option \'--junction_saturation_output_plot\': \'$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --junction_saturation_output_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --junction_saturation_output_plot=*) + [ -n "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT" ] && ViashError Bad arguments for option \'--junction_saturation_output_plot=*\': \'$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --read_distribution_output) + [ -n "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT" ] && ViashError Bad arguments for option \'--read_distribution_output\': \'$VIASH_PAR_READ_DISTRIBUTION_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DISTRIBUTION_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_distribution_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_distribution_output=*) + [ -n "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT" ] && ViashError Bad arguments for option \'--read_distribution_output=*\': \'$VIASH_PAR_READ_DISTRIBUTION_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DISTRIBUTION_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --read_duplication_output_duplication_rate_plot_r) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_plot_r\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_duplication_output_duplication_rate_plot_r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_duplication_output_duplication_rate_plot_r=*) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_plot_r=*\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R=$(ViashRemoveFlags "$1") + shift 1 + ;; + --read_duplication_output_duplication_rate_plot) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_plot\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_duplication_output_duplication_rate_plot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_duplication_output_duplication_rate_plot=*) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_plot=*\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --read_duplication_output_duplication_rate_mapping) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_mapping\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_duplication_output_duplication_rate_mapping. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_duplication_output_duplication_rate_mapping=*) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_mapping=*\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING=$(ViashRemoveFlags "$1") + shift 1 + ;; + --read_duplication_output_duplication_rate_sequence) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_sequence\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --read_duplication_output_duplication_rate_sequence. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --read_duplication_output_duplication_rate_sequence=*) + [ -n "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE" ] && ViashError Bad arguments for option \'--read_duplication_output_duplication_rate_sequence=*\': \'$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tin_output_summary) + [ -n "$VIASH_PAR_TIN_OUTPUT_SUMMARY" ] && ViashError Bad arguments for option \'--tin_output_summary\': \'$VIASH_PAR_TIN_OUTPUT_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TIN_OUTPUT_SUMMARY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tin_output_summary. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tin_output_summary=*) + [ -n "$VIASH_PAR_TIN_OUTPUT_SUMMARY" ] && ViashError Bad arguments for option \'--tin_output_summary=*\': \'$VIASH_PAR_TIN_OUTPUT_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TIN_OUTPUT_SUMMARY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tin_output_metrics) + [ -n "$VIASH_PAR_TIN_OUTPUT_METRICS" ] && ViashError Bad arguments for option \'--tin_output_metrics\': \'$VIASH_PAR_TIN_OUTPUT_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TIN_OUTPUT_METRICS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tin_output_metrics. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tin_output_metrics=*) + [ -n "$VIASH_PAR_TIN_OUTPUT_METRICS" ] && ViashError Bad arguments for option \'--tin_output_metrics=*\': \'$VIASH_PAR_TIN_OUTPUT_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TIN_OUTPUT_METRICS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_dupmatrix) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX" ] && ViashError Bad arguments for option \'--dupradar_output_dupmatrix\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_dupmatrix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_dupmatrix=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX" ] && ViashError Bad arguments for option \'--dupradar_output_dupmatrix=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_dup_intercept_mqc) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ] && ViashError Bad arguments for option \'--dupradar_output_dup_intercept_mqc\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_dup_intercept_mqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_dup_intercept_mqc=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ] && ViashError Bad arguments for option \'--dupradar_output_dup_intercept_mqc=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_duprate_exp_boxplot) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ] && ViashError Bad arguments for option \'--dupradar_output_duprate_exp_boxplot\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_duprate_exp_boxplot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_duprate_exp_boxplot=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ] && ViashError Bad arguments for option \'--dupradar_output_duprate_exp_boxplot=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_duprate_exp_densplot) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ] && ViashError Bad arguments for option \'--dupradar_output_duprate_exp_densplot\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_duprate_exp_densplot. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_duprate_exp_densplot=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ] && ViashError Bad arguments for option \'--dupradar_output_duprate_exp_densplot=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_duprate_exp_denscurve_mqc) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ] && ViashError Bad arguments for option \'--dupradar_output_duprate_exp_denscurve_mqc\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_duprate_exp_denscurve_mqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_duprate_exp_denscurve_mqc=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ] && ViashError Bad arguments for option \'--dupradar_output_duprate_exp_denscurve_mqc=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_expression_histogram) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM" ] && ViashError Bad arguments for option \'--dupradar_output_expression_histogram\': \'$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_expression_histogram. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_expression_histogram=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM" ] && ViashError Bad arguments for option \'--dupradar_output_expression_histogram=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dupradar_output_intercept_slope) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ] && ViashError Bad arguments for option \'--dupradar_output_intercept_slope\': \'$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dupradar_output_intercept_slope. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dupradar_output_intercept_slope=*) + [ -n "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ] && ViashError Bad arguments for option \'--dupradar_output_intercept_slope=*\': \'$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --qualimap_qc_report) + [ -n "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && ViashError Bad arguments for option \'--qualimap_qc_report\': \'$VIASH_PAR_QUALIMAP_QC_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_QC_REPORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_qc_report. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --qualimap_qc_report=*) + [ -n "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && ViashError Bad arguments for option \'--qualimap_qc_report=*\': \'$VIASH_PAR_QUALIMAP_QC_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_QC_REPORT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --qualimap_counts) + [ -n "$VIASH_PAR_QUALIMAP_COUNTS" ] && ViashError Bad arguments for option \'--qualimap_counts\': \'$VIASH_PAR_QUALIMAP_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --qualimap_counts=*) + [ -n "$VIASH_PAR_QUALIMAP_COUNTS" ] && ViashError Bad arguments for option \'--qualimap_counts=*\': \'$VIASH_PAR_QUALIMAP_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --qualimap_report) + [ -n "$VIASH_PAR_QUALIMAP_REPORT" ] && ViashError Bad arguments for option \'--qualimap_report\': \'$VIASH_PAR_QUALIMAP_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_REPORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_report. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --qualimap_report=*) + [ -n "$VIASH_PAR_QUALIMAP_REPORT" ] && ViashError Bad arguments for option \'--qualimap_report=*\': \'$VIASH_PAR_QUALIMAP_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_REPORT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --deseq2_output) + [ -n "$VIASH_PAR_DESEQ2_OUTPUT" ] && ViashError Bad arguments for option \'--deseq2_output\': \'$VIASH_PAR_DESEQ2_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DESEQ2_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --deseq2_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --deseq2_output=*) + [ -n "$VIASH_PAR_DESEQ2_OUTPUT" ] && ViashError Bad arguments for option \'--deseq2_output=*\': \'$VIASH_PAR_DESEQ2_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DESEQ2_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --deseq2_output_pseudo) + [ -n "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO" ] && ViashError Bad arguments for option \'--deseq2_output_pseudo\': \'$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DESEQ2_OUTPUT_PSEUDO="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --deseq2_output_pseudo. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --deseq2_output_pseudo=*) + [ -n "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO" ] && ViashError Bad arguments for option \'--deseq2_output_pseudo=*\': \'$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DESEQ2_OUTPUT_PSEUDO=$(ViashRemoveFlags "$1") + shift 1 + ;; + --multiqc_report) + [ -n "$VIASH_PAR_MULTIQC_REPORT" ] && ViashError Bad arguments for option \'--multiqc_report\': \'$VIASH_PAR_MULTIQC_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_REPORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_report. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_report=*) + [ -n "$VIASH_PAR_MULTIQC_REPORT" ] && ViashError Bad arguments for option \'--multiqc_report=*\': \'$VIASH_PAR_MULTIQC_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_REPORT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --multiqc_data) + [ -n "$VIASH_PAR_MULTIQC_DATA" ] && ViashError Bad arguments for option \'--multiqc_data\': \'$VIASH_PAR_MULTIQC_DATA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_DATA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_data. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_data=*) + [ -n "$VIASH_PAR_MULTIQC_DATA" ] && ViashError Bad arguments for option \'--multiqc_data=*\': \'$VIASH_PAR_MULTIQC_DATA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_DATA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --multiqc_plots) + [ -n "$VIASH_PAR_MULTIQC_PLOTS" ] && ViashError Bad arguments for option \'--multiqc_plots\': \'$VIASH_PAR_MULTIQC_PLOTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_PLOTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_plots. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_plots=*) + [ -n "$VIASH_PAR_MULTIQC_PLOTS" ] && ViashError Bad arguments for option \'--multiqc_plots=*\': \'$VIASH_PAR_MULTIQC_PLOTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_PLOTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --multiqc_versions) + [ -n "$VIASH_PAR_MULTIQC_VERSIONS" ] && ViashError Bad arguments for option \'--multiqc_versions\': \'$VIASH_PAR_MULTIQC_VERSIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_VERSIONS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --multiqc_versions. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --multiqc_versions=*) + [ -n "$VIASH_PAR_MULTIQC_VERSIONS" ] && ViashError Bad arguments for option \'--multiqc_versions=*\': \'$VIASH_PAR_MULTIQC_VERSIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MULTIQC_VERSIONS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_counts_gene) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_GENE" ] && ViashError Bad arguments for option \'--pseudo_counts_gene\': \'$VIASH_PAR_PSEUDO_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_counts_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_counts_gene=*) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_GENE" ] && ViashError Bad arguments for option \'--pseudo_counts_gene=*\': \'$VIASH_PAR_PSEUDO_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_counts_gene_length_scaled) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--pseudo_counts_gene_length_scaled\': \'$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_counts_gene_length_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_counts_gene_length_scaled=*) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--pseudo_counts_gene_length_scaled=*\': \'$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_counts_gene_scaled) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--pseudo_counts_gene_scaled\': \'$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_counts_gene_scaled. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_counts_gene_scaled=*) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--pseudo_counts_gene_scaled=*\': \'$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_tpm_gene) + [ -n "$VIASH_PAR_PSEUDO_TPM_GENE" ] && ViashError Bad arguments for option \'--pseudo_tpm_gene\': \'$VIASH_PAR_PSEUDO_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_TPM_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_tpm_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_tpm_gene=*) + [ -n "$VIASH_PAR_PSEUDO_TPM_GENE" ] && ViashError Bad arguments for option \'--pseudo_tpm_gene=*\': \'$VIASH_PAR_PSEUDO_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_TPM_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_tpm_transcript) + [ -n "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--pseudo_tpm_transcript\': \'$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_TPM_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_tpm_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_tpm_transcript=*) + [ -n "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--pseudo_tpm_transcript=*\': \'$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_TPM_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_counts_transcript) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--pseudo_counts_transcript\': \'$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_counts_transcript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_counts_transcript=*) + [ -n "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--pseudo_counts_transcript=*\': \'$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pseudo_quant_merged_summarizedexperiment) + [ -n "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && ViashError Bad arguments for option \'--pseudo_quant_merged_summarizedexperiment\': \'$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_quant_merged_summarizedexperiment. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pseudo_quant_merged_summarizedexperiment=*) + [ -n "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && ViashError Bad arguments for option \'--pseudo_quant_merged_summarizedexperiment=*\': \'$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: native." + exit 1 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_ID+x} ]; then + ViashError '--id' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_FASTQ_1+x} ]; then + ViashError '--fastq_1' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_FASTA+x} ]; then + ViashError '--fasta' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_STRANDEDNESS+x} ]; then + VIASH_PAR_STRANDEDNESS="auto" +fi +if [ -z ${VIASH_PAR_GENCODE+x} ]; then + VIASH_PAR_GENCODE="false" +fi +if [ -z ${VIASH_PAR_GTF_EXTRA_ATTRIBUTES+x} ]; then + VIASH_PAR_GTF_EXTRA_ATTRIBUTES="gene_name" +fi +if [ -z ${VIASH_PAR_GTF_GROUP_FEATURES+x} ]; then + VIASH_PAR_GTF_GROUP_FEATURES="gene_id" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS_GROUP_TYPE+x} ]; then + VIASH_PAR_FEATURECOUNTS_GROUP_TYPE="gene_biotype" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE+x} ]; then + VIASH_PAR_FEATURECOUNTS_FEATURE_TYPE="exon" +fi +if [ -z ${VIASH_PAR_TRIMMER+x} ]; then + VIASH_PAR_TRIMMER="trimgalore" +fi +if [ -z ${VIASH_PAR_MIN_TRIMMED_READS+x} ]; then + VIASH_PAR_MIN_TRIMMED_READS="10000" +fi +if [ -z ${VIASH_PAR_REMOVE_RIBO_RNA+x} ]; then + VIASH_PAR_REMOVE_RIBO_RNA="false" +fi +if [ -z ${VIASH_PAR_RIBO_DATABASE_MANIFEST+x} ]; then + VIASH_PAR_RIBO_DATABASE_MANIFEST="src/assets/rrna-db-defaults.txt" +fi +if [ -z ${VIASH_PAR_WITH_UMI+x} ]; then + VIASH_PAR_WITH_UMI="false" +fi +if [ -z ${VIASH_PAR_UMITOOLS_EXTRACT_METHOD+x} ]; then + VIASH_PAR_UMITOOLS_EXTRACT_METHOD="string" +fi +if [ -z ${VIASH_PAR_UMI_DISCARD_READ+x} ]; then + VIASH_PAR_UMI_DISCARD_READ="0" +fi +if [ -z ${VIASH_PAR_UMITOOLS_UMI_SEPARATOR+x} ]; then + VIASH_PAR_UMITOOLS_UMI_SEPARATOR="_" +fi +if [ -z ${VIASH_PAR_UMITOOLS_GROUPING_METHOD+x} ]; then + VIASH_PAR_UMITOOLS_GROUPING_METHOD="directional" +fi +if [ -z ${VIASH_PAR_UMI_DEDUP_STATS+x} ]; then + VIASH_PAR_UMI_DEDUP_STATS="false" +fi +if [ -z ${VIASH_PAR_ALIGNER+x} ]; then + VIASH_PAR_ALIGNER="star_salmon" +fi +if [ -z ${VIASH_PAR_PSEUDO_ALIGNER+x} ]; then + VIASH_PAR_PSEUDO_ALIGNER="salmon" +fi +if [ -z ${VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE+x} ]; then + VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE="31" +fi +if [ -z ${VIASH_PAR_BAM_CSI_INDEX+x} ]; then + VIASH_PAR_BAM_CSI_INDEX="false" +fi +if [ -z ${VIASH_PAR_MIN_MAPPED_READS+x} ]; then + VIASH_PAR_MIN_MAPPED_READS="5" +fi +if [ -z ${VIASH_PAR_STRINGTIE_IGNORE_GTF+x} ]; then + VIASH_PAR_STRINGTIE_IGNORE_GTF="false" +fi +if [ -z ${VIASH_PAR_EXTRA_STRINGTIE_ARGS+x} ]; then + VIASH_PAR_EXTRA_STRINGTIE_ARGS="-v" +fi +if [ -z ${VIASH_PAR_SAVE_UNALIGNED+x} ]; then + VIASH_PAR_SAVE_UNALIGNED="false" +fi +if [ -z ${VIASH_PAR_SAVE_ALIGN_INTERMEDS+x} ]; then + VIASH_PAR_SAVE_ALIGN_INTERMEDS="false" +fi +if [ -z ${VIASH_PAR_SKIP_ALIGNMENT+x} ]; then + VIASH_PAR_SKIP_ALIGNMENT="false" +fi +if [ -z ${VIASH_PAR_SKIP_PSEUDO_ALIGNMENT+x} ]; then + VIASH_PAR_SKIP_PSEUDO_ALIGNMENT="false" +fi +if [ -z ${VIASH_PAR_SKIP_FASTQC+x} ]; then + VIASH_PAR_SKIP_FASTQC="false" +fi +if [ -z ${VIASH_PAR_SKIP_TRIMMING+x} ]; then + VIASH_PAR_SKIP_TRIMMING="false" +fi +if [ -z ${VIASH_PAR_SKIP_UMI_EXTRACT+x} ]; then + VIASH_PAR_SKIP_UMI_EXTRACT="false" +fi +if [ -z ${VIASH_PAR_SKIP_QC+x} ]; then + VIASH_PAR_SKIP_QC="false" +fi +if [ -z ${VIASH_PAR_SKIP_MARKDUPLICATES+x} ]; then + VIASH_PAR_SKIP_MARKDUPLICATES="false" +fi +if [ -z ${VIASH_PAR_SKIP_STRINGTIE+x} ]; then + VIASH_PAR_SKIP_STRINGTIE="false" +fi +if [ -z ${VIASH_PAR_SKIP_BIOTYPE_QC+x} ]; then + VIASH_PAR_SKIP_BIOTYPE_QC="false" +fi +if [ -z ${VIASH_PAR_SKIP_BIGWIG+x} ]; then + VIASH_PAR_SKIP_BIGWIG="false" +fi +if [ -z ${VIASH_PAR_SKIP_PRESEQ+x} ]; then + VIASH_PAR_SKIP_PRESEQ="false" +fi +if [ -z ${VIASH_PAR_SKIP_DUPRADAR+x} ]; then + VIASH_PAR_SKIP_DUPRADAR="false" +fi +if [ -z ${VIASH_PAR_SKIP_QUALIMAP+x} ]; then + VIASH_PAR_SKIP_QUALIMAP="false" +fi +if [ -z ${VIASH_PAR_SKIP_RSEQC+x} ]; then + VIASH_PAR_SKIP_RSEQC="false" +fi +if [ -z ${VIASH_PAR_SKIP_MULTIQC+x} ]; then + VIASH_PAR_SKIP_MULTIQC="false" +fi +if [ -z ${VIASH_PAR_EXTRA_PICARD_ARGS+x} ]; then + VIASH_PAR_EXTRA_PICARD_ARGS=" --ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" +fi +if [ -z ${VIASH_PAR_EXTRA_PRESEQ_ARGS+x} ]; then + VIASH_PAR_EXTRA_PRESEQ_ARGS="-verbose -seed 1 -seg_len 100000000" +fi +if [ -z ${VIASH_PAR_DESEQ2_VST+x} ]; then + VIASH_PAR_DESEQ2_VST="true" +fi +if [ -z ${VIASH_PAR_RSEQC_MODULES+x} ]; then + VIASH_PAR_RSEQC_MODULES="bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication" +fi +if [ -z ${VIASH_PAR_OUTPUT_FASTA+x} ]; then + VIASH_PAR_OUTPUT_FASTA="reference/genome.fasta" +fi +if [ -z ${VIASH_PAR_OUTPUT_GTF+x} ]; then + VIASH_PAR_OUTPUT_GTF="reference/gene_annotation.gtf" +fi +if [ -z ${VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA+x} ]; then + VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA="reference/transcriptome.fasta" +fi +if [ -z ${VIASH_PAR_OUTPUT_GENE_BED+x} ]; then + VIASH_PAR_OUTPUT_GENE_BED="reference/gene_annotation.bed" +fi +if [ -z ${VIASH_PAR_OUTPUT_STAR_INDEX+x} ]; then + VIASH_PAR_OUTPUT_STAR_INDEX="reference/index/STAR" +fi +if [ -z ${VIASH_PAR_OUTPUT_SALMON_INDEX+x} ]; then + VIASH_PAR_OUTPUT_SALMON_INDEX="reference/index/Salmon" +fi +if [ -z ${VIASH_PAR_OUTPUT_BBSPLIT_INDEX+x} ]; then + VIASH_PAR_OUTPUT_BBSPLIT_INDEX="reference/index/BBSplit" +fi +if [ -z ${VIASH_PAR_OUTPUT_KALLISTO_INDEX+x} ]; then + VIASH_PAR_OUTPUT_KALLISTO_INDEX="reference/index/Kallisto" +fi +if [ -z ${VIASH_PAR_OUTPUT_FASTQ_1+x} ]; then + VIASH_PAR_OUTPUT_FASTQ_1="fastq/\${id}_r1.fastq.gz" +fi +if [ -z ${VIASH_PAR_OUTPUT_FASTQ_2+x} ]; then + VIASH_PAR_OUTPUT_FASTQ_2="fastq/\${id}_r2.fastq.gz" +fi +if [ -z ${VIASH_PAR_FASTQC_HTML_1+x} ]; then + VIASH_PAR_FASTQC_HTML_1="fastqc_raw/\${id}_r1.fastqc.html" +fi +if [ -z ${VIASH_PAR_FASTQC_HTML_2+x} ]; then + VIASH_PAR_FASTQC_HTML_2="fastqc_raw/\${id}_r2.fastqc.html" +fi +if [ -z ${VIASH_PAR_FASTQC_ZIP_1+x} ]; then + VIASH_PAR_FASTQC_ZIP_1="fastqc_raw/\${id}_r1.fastqc.zip" +fi +if [ -z ${VIASH_PAR_FASTQC_ZIP_2+x} ]; then + VIASH_PAR_FASTQC_ZIP_2="fastqc_raw/\${id}_r2.fastqc.zip" +fi +if [ -z ${VIASH_PAR_TRIM_HTML_1+x} ]; then + VIASH_PAR_TRIM_HTML_1="fastqc_trim/\${id}_r1.trimmed_fastqc.html" +fi +if [ -z ${VIASH_PAR_TRIM_HTML_2+x} ]; then + VIASH_PAR_TRIM_HTML_2="fastqc_trim/\${id}_r2.trimmed_fastqc.html" +fi +if [ -z ${VIASH_PAR_TRIM_ZIP_1+x} ]; then + VIASH_PAR_TRIM_ZIP_1="fastqc_trim/\${id}_r1.trimmed_fastqc.zip" +fi +if [ -z ${VIASH_PAR_TRIM_ZIP_2+x} ]; then + VIASH_PAR_TRIM_ZIP_2="fastqc_trim/\${id}_r2.trimmed_fastqc.zip" +fi +if [ -z ${VIASH_PAR_TRIM_LOG_1+x} ]; then + VIASH_PAR_TRIM_LOG_1="trimgalore/\${id}_r1.trimming_report.txt" +fi +if [ -z ${VIASH_PAR_TRIM_LOG_2+x} ]; then + VIASH_PAR_TRIM_LOG_2="trimgalore/\${id}_r2.trimming_report.txt" +fi +if [ -z ${VIASH_PAR_FASTP_TRIM_JSON+x} ]; then + VIASH_PAR_FASTP_TRIM_JSON="fastp/\$id_out.json" +fi +if [ -z ${VIASH_PAR_FASTP_TRIM_HTML+x} ]; then + VIASH_PAR_FASTP_TRIM_HTML="fastp/\$id_out.html" +fi +if [ -z ${VIASH_PAR_SORTMERNA_LOG+x} ]; then + VIASH_PAR_SORTMERNA_LOG="sortmerna/\$id.log" +fi +if [ -z ${VIASH_PAR_STAR_ALIGNMENT+x} ]; then + VIASH_PAR_STAR_ALIGNMENT="STAR/\$id" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_SORTED+x} ]; then + VIASH_PAR_GENOME_BAM_SORTED="STAR/genome_processed/\$id.genome.bam" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_INDEX+x} ]; then + VIASH_PAR_GENOME_BAM_INDEX="STAR/genome_processed/\$id.genome.bam.bai" +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME_BAM+x} ]; then + VIASH_PAR_TRANSCRIPTOME_BAM="STAR/transcriptome_processed/\$id.transcriptome.bam" +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME_BAM_INDEX+x} ]; then + VIASH_PAR_TRANSCRIPTOME_BAM_INDEX="STAR/transcriptome_processed/\$id.transcriptome.bam.bai" +fi +if [ -z ${VIASH_PAR_STAR_LOG+x} ]; then + VIASH_PAR_STAR_LOG="STAR/log/\$id.log" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_STATS+x} ]; then + VIASH_PAR_GENOME_BAM_STATS="samtools_stats/\$id.genome.stats" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_FLAGSTAT+x} ]; then + VIASH_PAR_GENOME_BAM_FLAGSTAT="samtools_stats/\$id.genome.flagstat" +fi +if [ -z ${VIASH_PAR_GENOME_BAM_IDXSTATS+x} ]; then + VIASH_PAR_GENOME_BAM_IDXSTATS="samtools_stats/\$id.genome.idxstats" +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME_BAM_STATS+x} ]; then + VIASH_PAR_TRANSCRIPTOME_BAM_STATS="samtools_stats/\$id.transcriptome.stats" +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT+x} ]; then + VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT="samtools_stats/\$id.transcriptome.flagstat" +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS+x} ]; then + VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS="samtools_stats/\$id.transcriptome.idxstats" +fi +if [ -z ${VIASH_PAR_SALMON_QUANT_RESULTS+x} ]; then + VIASH_PAR_SALMON_QUANT_RESULTS="STAR_Salmon/\$id" +fi +if [ -z ${VIASH_PAR_SALMON_QUANT_RESULTS_FILE+x} ]; then + VIASH_PAR_SALMON_QUANT_RESULTS_FILE="STAR_Salmon/\$id/quant.sf" +fi +if [ -z ${VIASH_PAR_PSEUDO_QUANT_RESULTS+x} ]; then + VIASH_PAR_PSEUDO_QUANT_RESULTS="Pseudo_align_quant/\$id" +fi +if [ -z ${VIASH_PAR_RSEM_COUNTS_GENE+x} ]; then + VIASH_PAR_RSEM_COUNTS_GENE="RSEM/\$id.genes.results" +fi +if [ -z ${VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS+x} ]; then + VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS="RSEM/\$id.isoforms.results" +fi +if [ -z ${VIASH_PAR_BAM_STAR_RSEM+x} ]; then + VIASH_PAR_BAM_STAR_RSEM="RSEM/\$id.STAR.genome.bam" +fi +if [ -z ${VIASH_PAR_BAM_GENOME_RSEM+x} ]; then + VIASH_PAR_BAM_GENOME_RSEM="RSEM/\$id.genome.bam" +fi +if [ -z ${VIASH_PAR_BAM_TRANSCRIPT_RSEM+x} ]; then + VIASH_PAR_BAM_TRANSCRIPT_RSEM="RSEM/\$id.transcript.bam" +fi +if [ -z ${VIASH_PAR_TPM_GENE+x} ]; then + VIASH_PAR_TPM_GENE="transcript_quantification/gene_tpm.tsv" +fi +if [ -z ${VIASH_PAR_COUNTS_GENE+x} ]; then + VIASH_PAR_COUNTS_GENE="transcript_quantification/gene_counts.tsv" +fi +if [ -z ${VIASH_PAR_COUNTS_GENE_LENGTH_SCALED+x} ]; then + VIASH_PAR_COUNTS_GENE_LENGTH_SCALED="transcript_quantification/gene_counts_length_scaled.tsv" +fi +if [ -z ${VIASH_PAR_COUNTS_GENE_SCALED+x} ]; then + VIASH_PAR_COUNTS_GENE_SCALED="transcript_quantification/gene_counts_scaled.tsv" +fi +if [ -z ${VIASH_PAR_TPM_TRANSCRIPT+x} ]; then + VIASH_PAR_TPM_TRANSCRIPT="transcript_quantification/transcript_tpm.tsv" +fi +if [ -z ${VIASH_PAR_COUNTS_TRANSCRIPT+x} ]; then + VIASH_PAR_COUNTS_TRANSCRIPT="transcript_quantification/transcript_counts.tsv" +fi +if [ -z ${VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT+x} ]; then + VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT="transcript_quantification/summarizedexperiment" +fi +if [ -z ${VIASH_PAR_MARKDUPLICATES_METRICS+x} ]; then + VIASH_PAR_MARKDUPLICATES_METRICS="picard/\$id.MarkDuplicates.metrics.txt" +fi +if [ -z ${VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF+x} ]; then + VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF="stringtie/\$id.transcripts.gtf" +fi +if [ -z ${VIASH_PAR_STRINGTIE_COVERAGE_GTF+x} ]; then + VIASH_PAR_STRINGTIE_COVERAGE_GTF="stringtie/\$id.coverage.gtf" +fi +if [ -z ${VIASH_PAR_STRINGTIE_ABUNDANCE+x} ]; then + VIASH_PAR_STRINGTIE_ABUNDANCE="stringtie/\$id.gene_abundance.txt" +fi +if [ -z ${VIASH_PAR_STRINGTIE_BALLGOWN+x} ]; then + VIASH_PAR_STRINGTIE_BALLGOWN="stringtie/\$id.ballgown" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS+x} ]; then + VIASH_PAR_FEATURECOUNTS="featurecounts/\$id.featureCounts.txt" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS_SUMMARY+x} ]; then + VIASH_PAR_FEATURECOUNTS_SUMMARY="featurecounts/\$id.featureCounts.txt.summary" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS_MULTIQC+x} ]; then + VIASH_PAR_FEATURECOUNTS_MULTIQC="featurecounts/\$id.featureCounts_mqc.tsv" +fi +if [ -z ${VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC+x} ]; then + VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC="featurecounts/\$id.featureCounts_rrna_mqc.tsv" +fi +if [ -z ${VIASH_PAR_BEDGRAPH_FORWARD+x} ]; then + VIASH_PAR_BEDGRAPH_FORWARD="bedgraph/\$id.forward.bedgraph" +fi +if [ -z ${VIASH_PAR_BEDGRAPH_REVERSE+x} ]; then + VIASH_PAR_BEDGRAPH_REVERSE="bedgraph/\$id.reverse.bedgraph" +fi +if [ -z ${VIASH_PAR_BIGWIG_FORWARD+x} ]; then + VIASH_PAR_BIGWIG_FORWARD="bigwig/\$id.forward.bigwig" +fi +if [ -z ${VIASH_PAR_BIGWIG_REVERSE+x} ]; then + VIASH_PAR_BIGWIG_REVERSE="bigwig/\$id.reverse.bigwig" +fi +if [ -z ${VIASH_PAR_PRESEQ_OUTPUT+x} ]; then + VIASH_PAR_PRESEQ_OUTPUT="preseq/\$id.lc_extrap.txt" +fi +if [ -z ${VIASH_PAR_BAMSTAT_OUTPUT+x} ]; then + VIASH_PAR_BAMSTAT_OUTPUT="RSeQC/bamstat/\$id.mapping_quality.txt" +fi +if [ -z ${VIASH_PAR_STRANDEDNESS_OUTPUT+x} ]; then + VIASH_PAR_STRANDEDNESS_OUTPUT="RSeQC/inferexperiment/\$id.strandedness.txt" +fi +if [ -z ${VIASH_PAR_INNER_DIST_OUTPUT_STATS+x} ]; then + VIASH_PAR_INNER_DIST_OUTPUT_STATS="RSeQC/innerdistance/\$id.inner_distance.stats" +fi +if [ -z ${VIASH_PAR_INNER_DIST_OUTPUT_DIST+x} ]; then + VIASH_PAR_INNER_DIST_OUTPUT_DIST="RSeQC/innerdistance/txt/\$id.inner_distance.txt" +fi +if [ -z ${VIASH_PAR_INNER_DIST_OUTPUT_FREQ+x} ]; then + VIASH_PAR_INNER_DIST_OUTPUT_FREQ="RSeQC/innerdistance/txt/\$id.inner_distance_freq.txt" +fi +if [ -z ${VIASH_PAR_INNER_DIST_OUTPUT_PLOT+x} ]; then + VIASH_PAR_INNER_DIST_OUTPUT_PLOT="RSeQC/innerdistance/pdf/\$id.inner_distance_plot.pdf" +fi +if [ -z ${VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R+x} ]; then + VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R="RSeQC/innerdistance/rscript/\$id.inner_distance_plot.r" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG="RSeQC/junctionannotation/log/\$id.junction_annotation.log" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R="RSeQC/junctionannotation/rscript/\$id.junction_annotation_plot.r" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED="RSeQC/junctionannotation/bed/\$id.junction_annotation.bed" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT="RSeQC/junctionannotation/bed/\$id.junction_annotation.Interact.bed" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET="RSeQC/junctionannotation/xls/\$id.junction_annotation.xls" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT="RSeQC/junctionannotation/pdf/\$id.splice_events.pdf" +fi +if [ -z ${VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT+x} ]; then + VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT="RSeQC/junctionannotation/pdf/\$id.splice_junctions_plot.pdf" +fi +if [ -z ${VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R+x} ]; then + VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R="RSeQC/junctionsaturation/rscript/\$id.junction_saturation_plot.r" +fi +if [ -z ${VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT+x} ]; then + VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT="RSeQC/junctionsaturation/pdf/\$id.junction_saturation_plot.pdf" +fi +if [ -z ${VIASH_PAR_READ_DISTRIBUTION_OUTPUT+x} ]; then + VIASH_PAR_READ_DISTRIBUTION_OUTPUT="RSeQC/readdistribution/\$id.read_distribution.txt" +fi +if [ -z ${VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R+x} ]; then + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R="RSeQC/readduplication/rscrpt/\$id.duplication_rate_plot.r" +fi +if [ -z ${VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT+x} ]; then + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT="RSeQC/readduplication/pdf/\$id.duplication_rate_plot.pdf" +fi +if [ -z ${VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING+x} ]; then + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING="RSeQC/readduplication/xls/\$id.duplication_rate_mapping.xls" +fi +if [ -z ${VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE+x} ]; then + VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE="RSeQC/readduplication/xls/\$id.duplication_rate_sequencing.xls" +fi +if [ -z ${VIASH_PAR_TIN_OUTPUT_SUMMARY+x} ]; then + VIASH_PAR_TIN_OUTPUT_SUMMARY="RSeQC/tin/txt/\$id.tin_summary.txt" +fi +if [ -z ${VIASH_PAR_TIN_OUTPUT_METRICS+x} ]; then + VIASH_PAR_TIN_OUTPUT_METRICS="RSeQC/tin/xls/\$id.tin.xls" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX="dupradar/gene_data/\$id.dup_matrix.txt" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC="dupradar/mqc_intercept/\$id.dup_intercept_mqc.txt" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT="dupradar/box_plot/\$id.duprate_exp_boxplot.pdf" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT="dupradar/scatter_plot/\$id.duprate_exp_densityplot.pdf" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC="dupradar/density_curve/\$id.duprate_exp_density_curve_mqc.pdf" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM="dupradar/histogram/\$id.expression_hist.pdf" +fi +if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE+x} ]; then + VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE="dupradar/intercept_slope/\$id.intercept_slope.txt" +fi +if [ -z ${VIASH_PAR_QUALIMAP_QC_REPORT+x} ]; then + VIASH_PAR_QUALIMAP_QC_REPORT="Qualimap/\$id.rnaseq_qc_results.txt" +fi +if [ -z ${VIASH_PAR_QUALIMAP_COUNTS+x} ]; then + VIASH_PAR_QUALIMAP_COUNTS="Qualimap/\$id.counts.txt" +fi +if [ -z ${VIASH_PAR_QUALIMAP_REPORT+x} ]; then + VIASH_PAR_QUALIMAP_REPORT="Qualimap/\$id.report.html" +fi +if [ -z ${VIASH_PAR_DESEQ2_OUTPUT+x} ]; then + VIASH_PAR_DESEQ2_OUTPUT="deseq2_qc" +fi +if [ -z ${VIASH_PAR_DESEQ2_OUTPUT_PSEUDO+x} ]; then + VIASH_PAR_DESEQ2_OUTPUT_PSEUDO="deseq2_qc_pseudo" +fi +if [ -z ${VIASH_PAR_MULTIQC_REPORT+x} ]; then + VIASH_PAR_MULTIQC_REPORT="multiqc/multiqc_report.html" +fi +if [ -z ${VIASH_PAR_MULTIQC_DATA+x} ]; then + VIASH_PAR_MULTIQC_DATA="multiqc/multiqc_data" +fi +if [ -z ${VIASH_PAR_MULTIQC_PLOTS+x} ]; then + VIASH_PAR_MULTIQC_PLOTS="multiqc/multiqc_plots" +fi +if [ -z ${VIASH_PAR_PSEUDO_COUNTS_GENE+x} ]; then + VIASH_PAR_PSEUDO_COUNTS_GENE="pseudo_alignment_quantification/gene_counts.tsv" +fi +if [ -z ${VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED+x} ]; then + VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED="pseudo_alignment_quantification/gene_counts_length_scaled.tsv" +fi +if [ -z ${VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED+x} ]; then + VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED="pseudo_alignment_quantification/gene_counts_scaled.tsv" +fi +if [ -z ${VIASH_PAR_PSEUDO_TPM_GENE+x} ]; then + VIASH_PAR_PSEUDO_TPM_GENE="pseudo_alignment_quantification/gene_tpm.tsv" +fi +if [ -z ${VIASH_PAR_PSEUDO_TPM_TRANSCRIPT+x} ]; then + VIASH_PAR_PSEUDO_TPM_TRANSCRIPT="pseudo_alignment_quantification/transcript_tpm.tsv" +fi +if [ -z ${VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT+x} ]; then + VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT="pseudo_alignment_quantification/transcript_counts.tsv" +fi +if [ -z ${VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT+x} ]; then + VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT="pseudo_alignment_quantification/quant_merged_summarizedexperiment" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FASTQ_1" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_FASTQ_1; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_FASTA" ] && [ ! -e "$VIASH_PAR_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then + ViashError "Input file '$VIASH_PAR_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GFF" ] && [ ! -e "$VIASH_PAR_GFF" ]; then + ViashError "Input file '$VIASH_PAR_GFF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_ADDITIONAL_FASTA" ] && [ ! -e "$VIASH_PAR_ADDITIONAL_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_ADDITIONAL_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPT_FASTA" ] && [ ! -e "$VIASH_PAR_TRANSCRIPT_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_TRANSCRIPT_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENE_BED" ] && [ ! -e "$VIASH_PAR_GENE_BED" ]; then + ViashError "Input file '$VIASH_PAR_GENE_BED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SPLICESITES" ] && [ ! -e "$VIASH_PAR_SPLICESITES" ]; then + ViashError "Input file '$VIASH_PAR_SPLICESITES' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STAR_INDEX" ] && [ ! -e "$VIASH_PAR_STAR_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_STAR_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RSEM_INDEX" ] && [ ! -e "$VIASH_PAR_RSEM_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_RSEM_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SALMON_INDEX" ] && [ ! -e "$VIASH_PAR_SALMON_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_SALMON_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_KALLISTO_INDEX" ] && [ ! -e "$VIASH_PAR_KALLISTO_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_KALLISTO_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_BBSPLIT_FASTA_LIST; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_BBSPLIT_INDEX" ] && [ ! -e "$VIASH_PAR_BBSPLIT_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_BBSPLIT_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ] && [ ! -e "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ]; then + ViashError "Input file '$VIASH_PAR_RIBO_DATABASE_MANIFEST' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MULTIQC_CUSTOM_CONFIG" ] && [ ! -e "$VIASH_PAR_MULTIQC_CUSTOM_CONFIG" ]; then + ViashError "Input file '$VIASH_PAR_MULTIQC_CUSTOM_CONFIG' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION" ] && [ ! -e "$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION" ]; then + ViashError "Input file '$VIASH_PAR_MULTIQC_METHODS_DESCRIPTION' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_GENCODE" ]]; then + if ! [[ "$VIASH_PAR_GENCODE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--gencode' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_TRIMMED_READS" ]]; then + if ! [[ "$VIASH_PAR_MIN_TRIMMED_READS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_trimmed_reads' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_REMOVE_RIBO_RNA" ]]; then + if ! [[ "$VIASH_PAR_REMOVE_RIBO_RNA" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--remove_ribo_rna' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WITH_UMI" ]]; then + if ! [[ "$VIASH_PAR_WITH_UMI" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--with_umi' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_UMI_DISCARD_READ" ]]; then + if ! [[ "$VIASH_PAR_UMI_DISCARD_READ" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--umi_discard_read' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_UMI_DEDUP_STATS" ]]; then + if ! [[ "$VIASH_PAR_UMI_DEDUP_STATS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--umi_dedup_stats' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE" ]]; then + if ! [[ "$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--pseudo_aligner_kmer_size' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" ]]; then + if ! [[ "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--kallisto_quant_fragment_length' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" ]]; then + if ! [[ "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--kallisto_quant_fragment_length_sd' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_BAM_CSI_INDEX" ]]; then + if ! [[ "$VIASH_PAR_BAM_CSI_INDEX" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--bam_csi_index' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_MAPPED_READS" ]]; then + if ! [[ "$VIASH_PAR_MIN_MAPPED_READS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_mapped_reads' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_STRINGTIE_IGNORE_GTF" ]]; then + if ! [[ "$VIASH_PAR_STRINGTIE_IGNORE_GTF" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--stringtie_ignore_gtf' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SAVE_UNALIGNED" ]]; then + if ! [[ "$VIASH_PAR_SAVE_UNALIGNED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--save_unaligned' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SAVE_ALIGN_INTERMEDS" ]]; then + if ! [[ "$VIASH_PAR_SAVE_ALIGN_INTERMEDS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--save_align_intermeds' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_ALIGNMENT" ]]; then + if ! [[ "$VIASH_PAR_SKIP_ALIGNMENT" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_alignment' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_PSEUDO_ALIGNMENT" ]]; then + if ! [[ "$VIASH_PAR_SKIP_PSEUDO_ALIGNMENT" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_pseudo_alignment' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_FASTQC" ]]; then + if ! [[ "$VIASH_PAR_SKIP_FASTQC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_fastqc' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_TRIMMING" ]]; then + if ! [[ "$VIASH_PAR_SKIP_TRIMMING" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_trimming' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_UMI_EXTRACT" ]]; then + if ! [[ "$VIASH_PAR_SKIP_UMI_EXTRACT" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_umi_extract' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_QC" ]]; then + if ! [[ "$VIASH_PAR_SKIP_QC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_qc' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_MARKDUPLICATES" ]]; then + if ! [[ "$VIASH_PAR_SKIP_MARKDUPLICATES" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_markduplicates' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_STRINGTIE" ]]; then + if ! [[ "$VIASH_PAR_SKIP_STRINGTIE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_stringtie' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_BIOTYPE_QC" ]]; then + if ! [[ "$VIASH_PAR_SKIP_BIOTYPE_QC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_biotype_qc' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_BIGWIG" ]]; then + if ! [[ "$VIASH_PAR_SKIP_BIGWIG" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_bigwig' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_PRESEQ" ]]; then + if ! [[ "$VIASH_PAR_SKIP_PRESEQ" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_preseq' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_DUPRADAR" ]]; then + if ! [[ "$VIASH_PAR_SKIP_DUPRADAR" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_dupradar' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_QUALIMAP" ]]; then + if ! [[ "$VIASH_PAR_SKIP_QUALIMAP" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_qualimap' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_RSEQC" ]]; then + if ! [[ "$VIASH_PAR_SKIP_RSEQC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_rseqc' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SKIP_MULTIQC" ]]; then + if ! [[ "$VIASH_PAR_SKIP_MULTIQC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--skip_multiqc' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DESEQ2_VST" ]]; then + if ! [[ "$VIASH_PAR_DESEQ2_VST" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--deseq2_vst' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_STRANDEDNESS" ]; then + VIASH_PAR_STRANDEDNESS_CHOICES=("unstranded;forward;reverse;auto") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_STRANDEDNESS_CHOICES[*]};" =~ ";$VIASH_PAR_STRANDEDNESS;" ]]; then + ViashError '--strandedness' specified value of \'$VIASH_PAR_STRANDEDNESS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_TRIMMER" ]; then + VIASH_PAR_TRIMMER_CHOICES=("trimgalore;fastp") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_TRIMMER_CHOICES[*]};" =~ ";$VIASH_PAR_TRIMMER;" ]]; then + ViashError '--trimmer' specified value of \'$VIASH_PAR_TRIMMER\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_UMITOOLS_EXTRACT_METHOD" ]; then + VIASH_PAR_UMITOOLS_EXTRACT_METHOD_CHOICES=("string;regex") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_UMITOOLS_EXTRACT_METHOD_CHOICES[*]};" =~ ";$VIASH_PAR_UMITOOLS_EXTRACT_METHOD;" ]]; then + ViashError '--umitools_extract_method' specified value of \'$VIASH_PAR_UMITOOLS_EXTRACT_METHOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_UMI_DISCARD_READ" ]; then + VIASH_PAR_UMI_DISCARD_READ_CHOICES=("0;1;2") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_UMI_DISCARD_READ_CHOICES[*]};" =~ ";$VIASH_PAR_UMI_DISCARD_READ;" ]]; then + ViashError '--umi_discard_read' specified value of \'$VIASH_PAR_UMI_DISCARD_READ\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_UMITOOLS_GROUPING_METHOD" ]; then + VIASH_PAR_UMITOOLS_GROUPING_METHOD_CHOICES=("unique;percentile;cluster;adjacency;directional") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_UMITOOLS_GROUPING_METHOD_CHOICES[*]};" =~ ";$VIASH_PAR_UMITOOLS_GROUPING_METHOD;" ]]; then + ViashError '--umitools_grouping_method' specified value of \'$VIASH_PAR_UMITOOLS_GROUPING_METHOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_ALIGNER" ]; then + VIASH_PAR_ALIGNER_CHOICES=("star_salmon;star_rsem;hisat2") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_ALIGNER_CHOICES[*]};" =~ ";$VIASH_PAR_ALIGNER;" ]]; then + ViashError '--aligner' specified value of \'$VIASH_PAR_ALIGNER\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_PSEUDO_ALIGNER" ]; then + VIASH_PAR_PSEUDO_ALIGNER_CHOICES=("salmon;kallisto") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_PSEUDO_ALIGNER_CHOICES[*]};" =~ ";$VIASH_PAR_PSEUDO_ALIGNER;" ]]; then + ViashError '--pseudo_aligner' specified value of \'$VIASH_PAR_PSEUDO_ALIGNER\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_RSEQC_MODULES" ]; then + VIASH_PAR_RSEQC_MODULES_CHOICES=("bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication;tin") + IFS=';' + set -f + for val in $VIASH_PAR_RSEQC_MODULES; do + if ! [[ ";${VIASH_PAR_RSEQC_MODULES_CHOICES[*]};" =~ ";${val};" ]]; then + ViashError '--rseqc_modules' specified value of \'${val}\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT_FASTA" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_FASTA")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_FASTA")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_GTF" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_GTF")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_GTF")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_GENE_BED" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_GENE_BED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_GENE_BED")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_STAR_INDEX" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_STAR_INDEX")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_STAR_INDEX")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_SALMON_INDEX" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_SALMON_INDEX")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_SALMON_INDEX")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_BBSPLIT_INDEX" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_BBSPLIT_INDEX")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_BBSPLIT_INDEX")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_KALLISTO_INDEX" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_KALLISTO_INDEX")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_KALLISTO_INDEX")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_FASTQ_1" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_FASTQ_1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_FASTQ_1")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_FASTQ_2" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_FASTQ_2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_FASTQ_2")" +fi +if [ ! -z "$VIASH_PAR_FASTQC_HTML_1" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQC_HTML_1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTQC_HTML_1")" +fi +if [ ! -z "$VIASH_PAR_FASTQC_HTML_2" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQC_HTML_2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTQC_HTML_2")" +fi +if [ ! -z "$VIASH_PAR_FASTQC_ZIP_1" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQC_ZIP_1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTQC_ZIP_1")" +fi +if [ ! -z "$VIASH_PAR_FASTQC_ZIP_2" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQC_ZIP_2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTQC_ZIP_2")" +fi +if [ ! -z "$VIASH_PAR_TRIM_HTML_1" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_HTML_1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_HTML_1")" +fi +if [ ! -z "$VIASH_PAR_TRIM_HTML_2" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_HTML_2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_HTML_2")" +fi +if [ ! -z "$VIASH_PAR_TRIM_ZIP_1" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_ZIP_1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_ZIP_1")" +fi +if [ ! -z "$VIASH_PAR_TRIM_ZIP_2" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_ZIP_2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_ZIP_2")" +fi +if [ ! -z "$VIASH_PAR_TRIM_LOG_1" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_LOG_1")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_LOG_1")" +fi +if [ ! -z "$VIASH_PAR_TRIM_LOG_2" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIM_LOG_2")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRIM_LOG_2")" +fi +if [ ! -z "$VIASH_PAR_FASTP_TRIM_JSON" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTP_TRIM_JSON")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTP_TRIM_JSON")" +fi +if [ ! -z "$VIASH_PAR_FASTP_TRIM_HTML" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTP_TRIM_HTML")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FASTP_TRIM_HTML")" +fi +if [ ! -z "$VIASH_PAR_SORTMERNA_LOG" ] && [ ! -d "$(dirname "$VIASH_PAR_SORTMERNA_LOG")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_SORTMERNA_LOG")" +fi +if [ ! -z "$VIASH_PAR_STAR_ALIGNMENT" ] && [ ! -d "$(dirname "$VIASH_PAR_STAR_ALIGNMENT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STAR_ALIGNMENT")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_SORTED" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_SORTED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_SORTED")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_INDEX" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_INDEX")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_INDEX")" +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM" ] && [ ! -d "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM")" +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX" ] && [ ! -d "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX")" +fi +if [ ! -z "$VIASH_PAR_STAR_LOG" ] && [ ! -d "$(dirname "$VIASH_PAR_STAR_LOG")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STAR_LOG")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_STATS" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_STATS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_STATS")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_FLAGSTAT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_FLAGSTAT")" +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && [ ! -d "$(dirname "$VIASH_PAR_GENOME_BAM_IDXSTATS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_GENOME_BAM_IDXSTATS")" +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS" ] && [ ! -d "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS")" +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT" ] && [ ! -d "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT")" +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS" ] && [ ! -d "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS")" +fi +if [ ! -z "$VIASH_PAR_SALMON_QUANT_RESULTS" ] && [ ! -d "$(dirname "$VIASH_PAR_SALMON_QUANT_RESULTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_SALMON_QUANT_RESULTS")" +fi +if [ ! -z "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE" ] && [ ! -d "$(dirname "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_QUANT_RESULTS" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_QUANT_RESULTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_QUANT_RESULTS")" +fi +if [ ! -z "$VIASH_PAR_RSEM_COUNTS_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_RSEM_COUNTS_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_RSEM_COUNTS_GENE")" +fi +if [ ! -z "$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS" ] && [ ! -d "$(dirname "$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS")" +fi +if [ ! -z "$VIASH_PAR_BAM_STAR_RSEM" ] && [ ! -d "$(dirname "$VIASH_PAR_BAM_STAR_RSEM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BAM_STAR_RSEM")" +fi +if [ ! -z "$VIASH_PAR_BAM_GENOME_RSEM" ] && [ ! -d "$(dirname "$VIASH_PAR_BAM_GENOME_RSEM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BAM_GENOME_RSEM")" +fi +if [ ! -z "$VIASH_PAR_BAM_TRANSCRIPT_RSEM" ] && [ ! -d "$(dirname "$VIASH_PAR_BAM_TRANSCRIPT_RSEM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BAM_TRANSCRIPT_RSEM")" +fi +if [ ! -z "$VIASH_PAR_TPM_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_TPM_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TPM_GENE")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE_SCALED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE_SCALED")" +fi +if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_TPM_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TPM_TRANSCRIPT")" +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPT")" +fi +if [ ! -z "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && [ ! -d "$(dirname "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT")" +fi +if [ ! -z "$VIASH_PAR_MARKDUPLICATES_METRICS" ] && [ ! -d "$(dirname "$VIASH_PAR_MARKDUPLICATES_METRICS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MARKDUPLICATES_METRICS")" +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF" ] && [ ! -d "$(dirname "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF")" +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_COVERAGE_GTF" ] && [ ! -d "$(dirname "$VIASH_PAR_STRINGTIE_COVERAGE_GTF")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STRINGTIE_COVERAGE_GTF")" +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_ABUNDANCE" ] && [ ! -d "$(dirname "$VIASH_PAR_STRINGTIE_ABUNDANCE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STRINGTIE_ABUNDANCE")" +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_BALLGOWN" ] && [ ! -d "$(dirname "$VIASH_PAR_STRINGTIE_BALLGOWN")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STRINGTIE_BALLGOWN")" +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS" ] && [ ! -d "$(dirname "$VIASH_PAR_FEATURECOUNTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FEATURECOUNTS")" +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_SUMMARY" ] && [ ! -d "$(dirname "$VIASH_PAR_FEATURECOUNTS_SUMMARY")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FEATURECOUNTS_SUMMARY")" +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_FEATURECOUNTS_MULTIQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FEATURECOUNTS_MULTIQC")" +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC")" +fi +if [ ! -z "$VIASH_PAR_BEDGRAPH_FORWARD" ] && [ ! -d "$(dirname "$VIASH_PAR_BEDGRAPH_FORWARD")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BEDGRAPH_FORWARD")" +fi +if [ ! -z "$VIASH_PAR_BEDGRAPH_REVERSE" ] && [ ! -d "$(dirname "$VIASH_PAR_BEDGRAPH_REVERSE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BEDGRAPH_REVERSE")" +fi +if [ ! -z "$VIASH_PAR_BIGWIG_FORWARD" ] && [ ! -d "$(dirname "$VIASH_PAR_BIGWIG_FORWARD")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BIGWIG_FORWARD")" +fi +if [ ! -z "$VIASH_PAR_BIGWIG_REVERSE" ] && [ ! -d "$(dirname "$VIASH_PAR_BIGWIG_REVERSE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BIGWIG_REVERSE")" +fi +if [ ! -z "$VIASH_PAR_PRESEQ_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_PRESEQ_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PRESEQ_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_BAMSTAT_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_BAMSTAT_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BAMSTAT_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_STRANDEDNESS_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_STRANDEDNESS_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_STRANDEDNESS_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_INNER_DIST_OUTPUT_STATS" ] && [ ! -d "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_STATS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_STATS")" +fi +if [ ! -z "$VIASH_PAR_INNER_DIST_OUTPUT_DIST" ] && [ ! -d "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_DIST")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_DIST")" +fi +if [ ! -z "$VIASH_PAR_INNER_DIST_OUTPUT_FREQ" ] && [ ! -d "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_FREQ")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_FREQ")" +fi +if [ ! -z "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT")" +fi +if [ ! -z "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R" ] && [ ! -d "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_INNER_DIST_OUTPUT_PLOT_R")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R")" +fi +if [ ! -z "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT")" +fi +if [ ! -z "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R" ] && [ ! -d "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R")" +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT")" +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING" ] && [ ! -d "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING")" +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE" ] && [ ! -d "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE")" +fi +if [ ! -z "$VIASH_PAR_TIN_OUTPUT_SUMMARY" ] && [ ! -d "$(dirname "$VIASH_PAR_TIN_OUTPUT_SUMMARY")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TIN_OUTPUT_SUMMARY")" +fi +if [ ! -z "$VIASH_PAR_TIN_OUTPUT_METRICS" ] && [ ! -d "$(dirname "$VIASH_PAR_TIN_OUTPUT_METRICS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_TIN_OUTPUT_METRICS")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM")" +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE")" +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_QC_REPORT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_QC_REPORT")" +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_COUNTS" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_COUNTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_COUNTS")" +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_REPORT" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_REPORT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_REPORT")" +fi +if [ ! -z "$VIASH_PAR_DESEQ2_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_DESEQ2_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DESEQ2_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO" ] && [ ! -d "$(dirname "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO")" +fi +if [ ! -z "$VIASH_PAR_MULTIQC_REPORT" ] && [ ! -d "$(dirname "$VIASH_PAR_MULTIQC_REPORT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MULTIQC_REPORT")" +fi +if [ ! -z "$VIASH_PAR_MULTIQC_DATA" ] && [ ! -d "$(dirname "$VIASH_PAR_MULTIQC_DATA")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MULTIQC_DATA")" +fi +if [ ! -z "$VIASH_PAR_MULTIQC_PLOTS" ] && [ ! -d "$(dirname "$VIASH_PAR_MULTIQC_PLOTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MULTIQC_PLOTS")" +fi +if [ ! -z "$VIASH_PAR_MULTIQC_VERSIONS" ] && [ ! -d "$(dirname "$VIASH_PAR_MULTIQC_VERSIONS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MULTIQC_VERSIONS")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_GENE")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_TPM_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_TPM_GENE")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_TPM_GENE")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT")" +fi +if [ ! -z "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && [ ! -d "$(dirname "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + + +# set dependency paths +VIASH_DEP_WORKFLOWS_PREPARE_GENOME="$VIASH_META_RESOURCES_DIR/../../../nextflow/workflows/prepare_genome/main.nf" +VIASH_DEP_CAT_FASTQ="$VIASH_META_RESOURCES_DIR/../../../nextflow/cat_fastq/main.nf" +VIASH_DEP_WORKFLOWS_PRE_PROCESSING="$VIASH_META_RESOURCES_DIR/../../../nextflow/workflows/pre_processing/main.nf" +VIASH_DEP_WORKFLOWS_GENOME_ALIGNMENT_AND_QUANT="$VIASH_META_RESOURCES_DIR/../../../nextflow/workflows/genome_alignment_and_quant/main.nf" +VIASH_DEP_WORKFLOWS_PSEUDO_ALIGNMENT_AND_QUANT="$VIASH_META_RESOURCES_DIR/../../../nextflow/workflows/pseudo_alignment_and_quant/main.nf" +VIASH_DEP_WORKFLOWS_POST_PROCESSING="$VIASH_META_RESOURCES_DIR/../../../nextflow/workflows/post_processing/main.nf" +VIASH_DEP_WORKFLOWS_QUALITY_CONTROL="$VIASH_META_RESOURCES_DIR/../../../nextflow/workflows/quality_control/main.nf" + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-rnaseq-XXXXXX").nf +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +//// VIASH START +// The following code has been auto-generated by Viash. + +//// VIASH END +workflow run_wf { + take: + input_ch + + main: + + reference_ch = input_ch + + | map { id, state -> + def biotype = state.gencode ? "gene_type" : state.featurecounts_group_type + def filter_gtf = + ( + ( !state.skip_alignment && state.aligner) // Condition 1: Alignment is required and aligner is set + || ( !state.skip_pseudo_alignment && state.pseudo_aligner ) // Condition 2: Pseudoalignment is required and pseudoaligner is set + || ( !state.transcript_fasta ) // Condition 3: Transcript FASTA file is not provided + ) + && + ( !state.skip_gtf_filter ) // Condition 4: --skip_gtf_filter is not provided + [ id, state + [ biotype: biotype, filter_gtf: filter_gtf ] ] + } + + | toSortedList + + | map { list -> + [ "ref", [ + fasta: list.collect { id, state -> state.fasta }.unique()[0], + gtf: list.collect { id, state -> state.gtf }.unique()[0], + gff: list.collect { id, state -> state.gff }.unique()[0], + additional_fasta: list.collect { id, state -> state.additional_fasta }.unique()[0], + transcript_fasta:list.collect { id, state -> state.transcript_fasta }.unique()[0], + gene_bed: list.collect { id, state -> state.gene_bed }.unique()[0], + bbsplit_fasta_list: list.collect { id, state -> state.bbsplit_fasta_list }.unique()[0], + aligner: list.collect { id, state -> state.aligner }.unique()[0], + pseudo_aligner: list.collect { id, state -> state.pseudo_aligner }.unique()[0], + star_index: list.collect { id, state -> state.star_index }.unique()[0], + rsem_index: list.collect { id, state -> state.rsem_index }.unique()[0], + salmon_index: list.collect { id, state -> state.salmon_index }.unique()[0], + kallisto_index: list.collect { id, state -> state.kallisto_index }.unique()[0], + // splicesites: list.collect { id, state -> state.splicesites }.unique()[0], + // hisat2_index: list.collect { id, state -> state.hisat2_index }.unique()[0], + bbsplit_index: list.collect { id, state -> state.bbsplit_index }.unique()[0], + // See: + skip_bbsplit: true, // list.collect { id, state -> state.skip_bbsplit }.unique()[0], + skip_alignment: list.collect { id, state -> state.skip_alignment }.unique()[0], + gencode: list.collect { id, state -> state.gencode }.unique()[0], + biotype: list.collect { id, state -> state.biotype }.unique()[0], + star_sjdb_gtf_feature_exon: list.collect { id, state -> state.star_sjdb_gtf_feature_exon }.unique()[0], + filter_gtf: list.collect { id, state -> state.filter_gtf }.unique()[0], + pseudo_aligner_kmer_size: list.collect { id, state -> state.pseudo_aligner_kmer_size }.unique()[0] ] + ] + } + + // prepare all the necessary files for reference genome + | prepare_genome.run ( + fromState: [ + "fasta": "fasta", + "gtf": "gtf", + "gff": "gff", + "additional_fasta": "additional_fasta", + "transcript_fasta": "transcript_fasta", + "gene_bed": "gene_bed", + "bbsplit_fasta_list": "bbsplit_fasta_list", + "star_index": "star_index", + "rsem_index": "rsem_index", + "salmon_index": "salmon_index", + "kallisto_index": "kallisto_index", + "pseudo_aligner_kmer_size": "pseudo_aligner_kmer_size", + // "splicesites": "splicesites", + // "hisat2_index": "hisat2_index", + "bbsplit_index": "bbsplit_index", + "skip_bbsplit": "skip_bbsplit", + "gencode": "gencode", + "biotype": "biotype", + "filter_gtf": "filter_gtf", + "aligner": "aligner", + "pseudo_aligner": "pseudo_aligner", + "skip_alignment": "skip_alignment", + "star_sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon" + ], + toState: [ + "fasta": "uncompressed_fasta", + "gtf": "gtf_uncompressed", + "transcript_fasta": "transcript_fasta_uncompressed", + "fai": "fai", + "chrom_sizes": "chrom_sizes", + "bbsplit_index": "bbsplit_index_uncompressed", + "star_index": "star_index_uncompressed", + "salmon_index": "salmon_index_uncompressed", + "kallisto_index": "kallisto_index_uncompressed", + "gene_bed": "gene_bed_uncompressed" + ] + ) + + // Check if contigs in genome fasta file > 512 Mbp + | map { id, state -> + (isBelowMaxContigSize(state.fai)) ? [id, state] : [id, state + [bam_csi_index: true]] + } + + // Pick out the state + | map { list -> list[1]} + + analysis_ch = input_ch + + | combine(reference_ch) + + | map { list -> [list[0], list[1] + list[2]] } + + // Concatenate FastQ files from same sample if required + | cat_fastq.run ( + fromState: [ + "read_1": "fastq_1", + "read_2": "fastq_2" + ], + toState: [ + "fastq_1": "fastq_1", + "fastq_2": "fastq_2" + ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // Pre-process fastq files + | pre_processing.run ( + fromState: [ + "id": "id", + "fastq_1": "fastq_1", + "fastq_2": "fastq_2", + "umitools_bc_pattern": "umitools_bc_pattern", + "umitools_bc_pattern2": "umitools_bc_pattern2", + "strandedness": "strandedness", + "transcript_fasta": "transcript_fasta", + "gtf": "gtf", + "with_umi": "with_umi", + "bbsplit_index": "bbsplit_index", + "bbsplit_fasta_list": "bbsplit_fasta_list", + "bc_pattern": "bc_pattern", + "ribo_database_manifest": "ribo_database_manifest", + "salmon_index": "salmon_index", + "skip_qc": "skip_qc", + "skip_fastqc": "skip_fastqc", + "skip_skip_umi_extract": "skip_umi_extract", + "umi_discard_read": "umi_discard_read", + "skip_trimming": "skip_trimming", + "trimmer": "trimmer", + "skip_bbsplit": "skip_bbsplit", + "remove_ribo_rna": "remove_ribo_rna" + ], + toState: [ + "fastqc_html_1": "fastqc_html_1", + "fastqc_html_2": "fastqc_html_2", + "fastqc_zip_1": "fastqc_zip_1", + "fastqc_zip_2": "fastqc_zip_2", + "fastq_1": "qc_output1", + "fastq_2": "qc_output2", + "trim_log_1": "trim_log_1", + "trim_log_2": "trim_log_2", + "trim_zip_1": "trim_zip_1", + "trim_zip_2": "trim_zip_2", + "trim_html_1": "trim_html_1", + "trim_html_2": "trim_html_2", + "passed_trimmed_reads": "passed_trimmed_reads", + "num_trimmed_reads": "num_trimmed_reads", + "sortmerna_log": "sortmerna_log", + "salmon_quant_output": "salmon_quant_output", + "fastp_failed_trim": "failed_trim", + "fastp_failed_trim_unpaired1": "failed_trim_unpaired1", + "fastp_failed_trim_unpaired2": "failed_trim_unpaired2", + "fastp_trim_json": "trim_json", + "fastp_trim_html": "trim_html", + "fastp_trim_merged_out": "trim_merged_out" + ] + ) + + // Infer strandedness from Salmon pseudo-alignment results + | map { id, state -> + (state.strandedness == 'auto') ? + [ id, state + [strandedness: getSalmonInferredStrandedness(state.salmon_quant_output)] ] : + [id, state] + } + + // Filter FastQ files based on minimum trimmed read count after adapter trimming + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def num_reads = (state.skip_trimming) ? + state.min_trimmed_reads + 1 : + ( + (state.trimmer == "fastp") ? + getFastpReadsAfterFiltering(state.fastp_trim_json) : + ( + (!state.skip_trimming && input.size() == 2) ? + getTrimGaloreReadsAfterFiltering(state.trim_log_2) : + getTrimGaloreReadsAfterFiltering(state.trim_log_1) + ) + ) + def passed_trimmed_reads = + (state.skip_trimming || (num_reads >= state.min_trimmed_reads)) ? + true : + false + [ id, state + [num_trimmed_reads: num_reads, passed_trimmed_reads: passed_trimmed_reads] ] + } + + // Genome alignment and quantification + | genome_alignment_and_quant.run ( + runIf: { id, state -> !state.skip_alignment && state.passed_trimmed_reads }, + fromState: [ + "id": "id", + "fastq_1": "fastq_1", + "fastq_2": "fastq_2", + "strandedness": "strandedness", + "gtf": "gtf", + "transcript_fasta": "transcript_fasta", + "bam_csi_index": "bam_csi_index", + "aligner": "aligner", + "rsem_index": "rsem_index", + "star_index": "star_index", + "star_sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon", + "star_ignore_sjdbgtf": "star_ignore_sjdbgtf", + "with_umi": "with_umi", + "umi_dedup_stats": "umi_dedup_stats", + "gtf_group_features": "gtf_group_features", + "gtf_extra_attributes": "gtf_extra_attributes", + "salmon_quant_libtype": "salmon_quant_libtype", + "salmon_index": "salmon_index", + "extra_rsem_calculate_expression_args": "extra_rsem_calculate_expression_args" + ], + toState: [ + "star_multiqc": "star_multiqc", + "rsem_multiqc": "rsem_multiqc", + "salmon_multiqc": "salmon_multiqc", + "genome_bam_sorted": "genome_bam_sorted", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "transcriptome_bam": "transcriptome_bam", + "transcriptome_bam_index": "transcriptome_bam_index", + "transcriptome_bam_stats": "transcriptome_bam_stats", + "transcriptome_bam_flagstat": "transcriptome_bam_flagstat", + "transcriptome_bam_idxstats": "transcriptome_bam_idxstats", + "quant_out_dir": "quant_out_dir", + "quant_results_file": "quant_results_file", + "rsem_counts_gene": "rsem_counts_gene", + "rsem_counts_transcripts": "rsem_counts_transcripts", + "bam_genome_rsem": "bam_genome_rsem", + "bam_transcript_rsem": "bam_transcript_rsem" + ] + ) + + // Filter channels to get samples that passed STAR minimum mapping percentage + | map { id, state -> + def percent_mapped = (!state.skip_alignment && state.passed_trimmed_reads) ? getStarPercentMapped(state.star_multiqc) : 0.0 + def passed_mapping = (percent_mapped >= state.min_mapped_reads) ? true : false + [ id, state + [percent_mapped: percent_mapped, passed_mapping: passed_mapping] ] + } + + // Pseudo-alignment and quantification + | pseudo_alignment_and_quant.run ( + runIf: { id, state -> !state.skip_pseudo_alignment && state.passed_trimmed_reads }, + fromState: [ + "id": "id", + "fastq_1": "fastq_1", + "fastq_2": "fastq_2", + "strandedness": "strandedness", + "gtf": "gtf", + "transcript_fasta": "transcript_fasta", + "pseudo_aligner": "pseudo_aligner", + "salmon_index": "salmon_index", + "kallisto_index": "kallisto_index", + "extra_star_align_args": "extra_star_align_args", + "star_ignore_sjdbgtf": "star_ignore_sjdbgtf", + "seq_platform": "seq_platform", + "seq_center": "seq_center", + "with_umi": "with_umi", + "umi_dedup_stats": "umi_dedup_stats", + "gtf_group_features": "gtf_group_features", + "gtf_extra_attributes": "gtf_extra_attributes", + "lib_type": "salmon_quant_libtype", + "kallisto_quant_fragment_length": "kallisto_quant_fragment_length", + "kallisto_quant_fragment_length_sd": "kallisto_quant_fragment_length_sd" + ], + toState: [ + "pseudo_quant_out_dir": "quant_out_dir", + "pseudo_salmon_quant_results_file": "salmon_quant_results_file", + "pseudo_kallisto_quant_results_file": "kallisto_quant_results_file", + "pseudo_multiqc": "pseudo_multiqc" + ] + ) + + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def paired = input.size() == 2 + [ id, state + [ paired: paired ] ] + } + + // Post-processing + | post_processing.run ( + runIf: { id, state -> !state.skip_alignment && state.passed_trimmed_reads && state.passed_mapping }, + fromState: [ + "id": "id", + "paired": "paired", + "strandedness": "strandedness", + "fasta": "fasta", + "fai": "fai", + "gtf": "gtf", + "genome_bam": "genome_bam_sorted", + "chrom_sizes": "chrom_sizes", + "star_multiqc": "star_multiqc", + "extra_picard_args": "extra_picard_args", + "extra_stringtie_args": "extra_stringtie_args", + "stringtie_ignore_gtf": "stringtie_ignore_gtf", + "extra_bedtools_args": "extra_bedtools_args", + "bam_csi_index": "bam_csi_index", + "min_mapped_reads": "min_mapped_reads", + "with_umi": "with_umi", + "skip_qc": "skip_qc", + "skip_markduplicates": "skip_markduplicates", + "skip_stringtie": "skip_stringtie", + "skip_bigwig":"gencode" + ], + toState: [ + "genome_bam_sorted": "processed_genome_bam", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "markduplicates_metrics": "markduplicates_metrics", + "stringtie_transcript_gtf": "stringtie_transcript_gtf", + "stringtie_coverage_gtf": "stringtie_coverage_gtf", + "stringtie_abundance": "stringtie_abundance", + "stringtie_ballgown": "stringtie_ballgown", + "bedgraph_forward": "bedgraph_forward", + "bedgraph_reverse": "bedgraph_reverse", + "bigwig_forward": "bigwig_forward", + "bigwig_reverse": "bigwig_reverse" + ] + ) + + // Final QC + + // Temporarily skip DESeq2 QC for now + // https://github.com/viash-hub/rnaseq/issues/31 + | map{ id, state -> [ id, state + [ skip_deseq2_qc: true ] ] } + + | quality_control.run ( + fromState: [ + "id": "id", + "paired": "paired", + "strandedness": "strandedness", + "skip_align": "skip_alignment", + "skip_pseudo_align": "skip_pseudo_alignment", + "skip_dupradar": "skip_dupradar", + "skip_qualimap": "skip_qualimap", + "skip_rseqc": "skip_rseqc", + "skip_multiqc": "skip_multiqc", + "skip_preseq": "skip_preseq", + "gtf": "gtf", + "num_trimmed_reads": "num_trimmed_reads", + "passed_trimmed_reads": "passed_trimmed_reads", + "passed_mapping": "passed_mapping", + "percent_mapped": "percent_mapped", + "genome_bam": "genome_bam_sorted", + "genome_bam_index": "genome_bam_index", + "salmon_multiqc": "salmon_multiqc", + "quant_results_file": "quant_results_file", + "rsem_multiqc": "rsem_multiqc", + "rsem_counts_gene": "rsem_counts_gene", + "rsem_counts_transcripts": "rsem_counts_transcripts", + "pseudo_multiqc": "pseudo_multiqc", + "pseudo_quant_out_dir": "pseudo_quant_out_dir", + "pseudo_salmon_quant_results_file": "pseudo_salmon_quant_results_file", + "pseudo_kallisto_quant_results_file": "pseudo_kallisto_quant_results_file", + "aligner": "aligner", + "pseudo_aligner": "pseudo_aligner", + "gene_bed": "gene_bed", + "extra_preseq_args": "extra_preseq_args", + "biotype": "biotype", + "skip_biotype_qc": "skip_biotype_qc", + "featurecounts_group_type": "featurecounts_group_type", + "featurecounts_feature_type": "featurecounts_feature_type", + "gencode": "gencode", + "skip_deseq2_qc": "skip_deseq2_qc", + "deseq2_vst": "deseq2_vst", + "multiqc_custom_config": "multiqc_custom_config", + "multiqc_title": "multiqc_title", + "multiqc_methods_description": "multiqc_methods_description", + "fastqc_zip_1": "fastqc_zip_1", + "fastqc_zip_2": "fastqc_zip_2", + "trim_log_1": "trim_log_1", + "trim_log_2": "trim_log_2", + "trim_zip_1": "trim_zip_1", + "trim_zip_2": "trim_zip_2", + "sortmerna_multiqc": "sortmerna_log", + "star_multiqc": "star_multiqc", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "markduplicates_multiqc": "markduplicates_metrics", + "rseqc_modules": "rseqc_modules" + ], + toState: [ + "preseq_output": "preseq_output", + "bamstat_output": "bamstat_output", + "strandedness_output": "strandedness_output", + "inner_dist_output_stats": "inner_dist_output_stats", + "inner_dist_output_dist": "inner_dist_output_dist", + "inner_dist_output_freq": "inner_dist_output_freq", + "inner_dist_output_plot": "inner_dist_output_plot", + "inner_dist_output_plot_r": "inner_dist_output_plot_r", + "junction_annotation_output_log": "junction_annotation_output_log", + "junction_annotation_output_plot_r": "junction_annotation_output_plot_r", + "junction_annotation_output_junction_bed": "junction_annotation_output_junction_bed", + "junction_annotation_output_junction_interact": "junction_annotation_output_junction_interact", + "junction_annotation_output_junction_sheet": "junction_annotation_output_junction_sheet", + "junction_annotation_output_splice_events_plot": "junction_annotation_output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "junction_annotation_output_splice_junctions_plot", + "junction_saturation_output_plot_r": "junction_saturation_output_plot_r", + "junction_saturation_output_plot": "junction_saturation_output_plot", + "read_distribution_output": "read_distribution_output", + "read_duplication_output_duplication_rate_plot_r": "read_duplication_output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "read_duplication_output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "read_duplication_output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "read_duplication_output_duplication_rate_sequence", + "tin_output_summary": "tin_output_summary", + "tin_output_metrics": "tin_output_metrics", + "dupradar_output_dupmatrix": "dupradar_output_dupmatrix", + "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", + "dupradar_output_duprate_exp_boxplot": "dupradar_output_duprate_exp_boxplot", + "dupradar_output_duprate_exp_densplot": "dupradar_output_duprate_exp_densplot", + "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", + "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", + "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", + "featurecounts": "featurecounts", + "featurecounts_summary": "featurecounts_summary", + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc", + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment", + "deseq2_output": "deseq2_output", + "pseudo_tpm_gene": "pseudo_tpm_gene", + "pseudo_counts_gene": "pseudo_counts_gene", + "pseudo_counts_gene_length_scaled": "pseudo_counts_gene_length_scaled", + "pseudo_counts_gene_scaled": "pseudo_counts_gene_scaled", + "pseudo_tpm_transcript": "pseudo_tpm_transcript", + "pseudo_counts_transcript": "pseudo_counts_transcript", + "pseudo_lengths_gene": "pseudo_lengths_gene", + "pseudo_lengths_transcript": "pseudo_lengths_transcript", + "pseudo_quant_merged_summarizedexperiment": "pseudo_quant_merged_summarizedexperiment", + "deseq2_output_pseudo": "deseq2_output_pseudo", + "multiqc_report": "multiqc_report", + "multiqc_data": "multiqc_data", + "multiqc_plots": "multiqc_plots" + ] + ) + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + [ + "output_fasta": "fasta", + "output_gtf": "gtf", + "output_transcript_fasta": "transcript_fasta", + "output_gene_bed": "gene_bed", + "output_bbsplit_index": "bbsplit_index", + "output_star_index": "star_index", + "output_salmon_index": "salmon_index", + "output_kallisto_index": "kallisto_index", + "fastqc_html_1": "fastqc_html_1", + "fastqc_html_2": "fastqc_html_2", + "fastqc_zip_1": "fastqc_zip_1", + "fastqc_zip_2": "fastqc_zip_2", + "output_fastq_1": "fastq_1", + "output_fastq_2": "fastq_2", + "trim_log_1": "trim_log_1", + "trim_log_2": "trim_log_2", + "trim_zip_1": "trim_zip_1", + "trim_zip_2": "trim_zip_2", + "trim_html_1": "trim_html_1", + "trim_html_2": "trim_html_2", + "sortmerna_log": "sortmerna_log", + "star_log": "star_multiqc", + "genome_bam_sorted": "genome_bam_sorted", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "transcriptome_bam": "transcriptome_bam", + "transcriptome_bam_index": "transcriptome_bam_index", + "transcriptome_bam_stats": "transcriptome_bam_stats", + "transcriptome_bam_flagstat": "transcriptome_bam_flagstat", + "transcriptome_bam_idxstats": "transcriptome_bam_idxstats", + "salmon_quant_results": "quant_out_dir", + "pseudo_quant_results": "pseudo_quant_out_dir", + "markduplicates_metrics": "markduplicates_metrics", + "stringtie_transcript_gtf": "stringtie_transcript_gtf", + "stringtie_coverage_gtf": "stringtie_coverage_gtf", + "stringtie_abundance": "stringtie_abundance", + "stringtie_ballgown": "stringtie_ballgown", + "featurecounts": "featurecounts", + "featurecounts_summary": "featurecounts_summary", + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc", + "bedgraph_forward": "bedgraph_forward", + "bedgraph_reverse": "bedgraph_reverse", + "bigwig_forward": "bigwig_forward", + "bigwig_reverse": "bigwig_reverse", + "preseq_output": "preseq_output", + "bamstat_output": "bamstat_output", + "strandedness_output": "strandedness_output", + "inner_dist_output_stats": "inner_dist_output_stats", + "inner_dist_output_dist": "inner_dist_output_dist", + "inner_dist_output_freq": "inner_dist_output_freq", + "inner_dist_output_plot": "inner_dist_output_plot", + "inner_dist_output_plot_r": "inner_dist_output_plot_r", + "junction_annotation_output_log": "junction_annotation_output_log", + "junction_annotation_output_plot_r": "junction_annotation_output_plot_r", + "junction_annotation_output_junction_bed": "junction_annotation_output_junction_bed", + "junction_annotation_output_junction_interact": "junction_annotation_output_junction_interact", + "junction_annotation_output_junction_sheet": "junction_annotation_output_junction_sheet", + "junction_annotation_output_splice_events_plot": "junction_annotation_output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "junction_annotation_output_splice_junctions_plot", + "junction_saturation_output_plot_r": "junction_saturation_output_plot_r", + "junction_saturation_output_plot": "junction_saturation_output_plot", + "read_distribution_output": "read_distribution_output", + "read_duplication_output_duplication_rate_plot_r": "read_duplication_output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "read_duplication_output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "read_duplication_output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "read_duplication_output_duplication_rate_sequence", + "tin_output_summary": "tin_output_summary", + "tin_output_metrics": "tin_output_metrics", + "dupradar_output_dupmatrix": "dupradar_output_dupmatrix", + "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", + "dupradar_output_duprate_exp_boxplot": "dupradar_output_duprate_exp_boxplot", + "dupradar_output_duprate_exp_densplot": "dupradar_output_duprate_exp_densplot", + "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", + "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", + "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment", + "deseq2_output": "deseq2_output", + "pseudo_tpm_gene": "pseudo_tpm_gene", + "pseudo_counts_gene": "pseudo_counts_gene", + "pseudo_counts_gene_length_scaled": "pseudo_counts_gene_length_scaled", + "pseudo_counts_gene_scaled": "pseudo_counts_gene_scaled", + "pseudo_tpm_transcript": "pseudo_tpm_transcript", + "pseudo_counts_transcript": "pseudo_counts_transcript", + "pseudo_lengths_gene": "pseudo_lengths_gene", + "pseudo_lengths_transcript": "pseudo_lengths_transcript", + "pseudo_quant_merged_summarizedexperiment": "pseudo_quant_merged_summarizedexperiment", + "deseq2_output_pseudo": "deseq2_output_pseudo", + "multiqc_report": "multiqc_report", + "multiqc_data": "multiqc_data", + "multiqc_plots": "multiqc_plots" + ] + ) + + emit: + analysis_ch +} + +import nextflow.Nextflow +// +// Function to generate an error if contigs in genome fasta file > 512 Mbp +// +def isBelowMaxContigSize(fai_file) { + def max_size = 512000000 + fai_file.eachLine { line -> + def lspl = line.split('\\t') + def chrom = lspl[0] + def size = lspl[1] + if (size.toInteger() > max_size) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\\n" + + " Contig longer than \${max_size}bp found in reference genome!\\n\\n" + + " \${chrom}: \${size}\\n\\n" + + " Provide the '--bam_csi_index' parameter to use a CSI instead of BAI index.\\n\\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.error(error_string) + return false + } + } + return true +} + +// Find a file in a directory +import java.nio.file.* +def findFile(Path dir, String fileName) { + Files.walk(dir) + .filter { Files.isRegularFile(it) && it.fileName.toString() == fileName } + .findFirst() + .orElse(null) +} + +import groovy.json.JsonSlurper +// +// Function that parses Salmon quant 'meta_info.json' output file to get inferred strandedness +// +def getSalmonInferredStrandedness(salmon_quant_output) { + def json_file = findFile(salmon_quant_output, 'meta_info.json') + def lib_type = new JsonSlurper().parseText(json_file.text).get('library_types')[0] + def strandedness = 'reverse' + if (lib_type) { + if (lib_type in ['U', 'IU']) { + strandedness = 'unstranded' + } + else if (lib_type in ['SF', 'ISF']) { + strandedness = 'forward' + } + else if (lib_type in ['SR', 'ISR']) { + strandedness = 'reverse' + } + } + return strandedness +} + +// +// Function that parses TrimGalore log output file to get total number of reads after trimming +// +def getTrimGaloreReadsAfterFiltering(log_file) { + def total_reads = 0 + def filtered_reads = 0 + log_file.eachLine { line -> + def total_reads_matcher = line =~ /([\\d\\.]+)\\ssequences processed in total/ + def filtered_reads_matcher = line =~ /shorter than the length cutoff[^:]+:\\s([\\d\\.]+)/ + if (total_reads_matcher) total_reads = total_reads_matcher[0][1].toFloat() + if (filtered_reads_matcher) filtered_reads = filtered_reads_matcher[0][1].toFloat() + } + return total_reads - filtered_reads +} + +// +// Function that parses fastp json output file to get total number of reads after trimming +// +def getFastpReadsAfterFiltering(json_file) { + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') + return json['after_filtering']['total_reads'].toLong() +} + +// +// Function that parses and returns the alignment rate from the STAR log outputs +// +def getStarPercentMapped(align_log) { + def percent_aligned = 0 + def pattern = /Uniquely mapped reads %\\s*\\|\\s*([\\d\\.]+)%/ + align_log.eachLine { line -> + def matcher = line =~ pattern + if (matcher) { + percent_aligned = matcher[0][1].toFloat() + } + } + return percent_aligned +} +VIASHMAIN +nextflow run . -main-script "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT_FASTA" ] && [ ! -e "$VIASH_PAR_OUTPUT_FASTA" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_GTF" ] && [ ! -e "$VIASH_PAR_OUTPUT_GTF" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA" ] && [ ! -e "$VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_TRANSCRIPT_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_GENE_BED" ] && [ ! -e "$VIASH_PAR_OUTPUT_GENE_BED" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_GENE_BED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_STAR_INDEX" ] && [ ! -e "$VIASH_PAR_OUTPUT_STAR_INDEX" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_STAR_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_SALMON_INDEX" ] && [ ! -e "$VIASH_PAR_OUTPUT_SALMON_INDEX" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_SALMON_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_BBSPLIT_INDEX" ] && [ ! -e "$VIASH_PAR_OUTPUT_BBSPLIT_INDEX" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_BBSPLIT_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_KALLISTO_INDEX" ] && [ ! -e "$VIASH_PAR_OUTPUT_KALLISTO_INDEX" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_KALLISTO_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_FASTP_TRIM_JSON" ] && [ ! -e "$VIASH_PAR_FASTP_TRIM_JSON" ]; then + ViashError "Output file '$VIASH_PAR_FASTP_TRIM_JSON' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_FASTP_TRIM_HTML" ] && [ ! -e "$VIASH_PAR_FASTP_TRIM_HTML" ]; then + ViashError "Output file '$VIASH_PAR_FASTP_TRIM_HTML' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STAR_ALIGNMENT" ] && [ ! -e "$VIASH_PAR_STAR_ALIGNMENT" ]; then + ViashError "Output file '$VIASH_PAR_STAR_ALIGNMENT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_SORTED" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_SORTED" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_SORTED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_INDEX" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_INDEX" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_BAM" ]; then + ViashError "Output file '$VIASH_PAR_TRANSCRIPTOME_BAM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX" ]; then + ViashError "Output file '$VIASH_PAR_TRANSCRIPTOME_BAM_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STAR_LOG" ] && [ ! -e "$VIASH_PAR_STAR_LOG" ]; then + ViashError "Output file '$VIASH_PAR_STAR_LOG' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_STATS" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_STATS" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_STATS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_FLAGSTAT" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_FLAGSTAT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOME_BAM_IDXSTATS" ] && [ ! -e "$VIASH_PAR_GENOME_BAM_IDXSTATS" ]; then + ViashError "Output file '$VIASH_PAR_GENOME_BAM_IDXSTATS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_BAM_STATS" ]; then + ViashError "Output file '$VIASH_PAR_TRANSCRIPTOME_BAM_STATS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT" ]; then + ViashError "Output file '$VIASH_PAR_TRANSCRIPTOME_BAM_FLAGSTAT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS" ]; then + ViashError "Output file '$VIASH_PAR_TRANSCRIPTOME_BAM_IDXSTATS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SALMON_QUANT_RESULTS" ] && [ ! -e "$VIASH_PAR_SALMON_QUANT_RESULTS" ]; then + ViashError "Output file '$VIASH_PAR_SALMON_QUANT_RESULTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE" ] && [ ! -e "$VIASH_PAR_SALMON_QUANT_RESULTS_FILE" ]; then + ViashError "Output file '$VIASH_PAR_SALMON_QUANT_RESULTS_FILE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_QUANT_RESULTS" ] && [ ! -e "$VIASH_PAR_PSEUDO_QUANT_RESULTS" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_QUANT_RESULTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RSEM_COUNTS_GENE" ] && [ ! -e "$VIASH_PAR_RSEM_COUNTS_GENE" ]; then + ViashError "Output file '$VIASH_PAR_RSEM_COUNTS_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS" ] && [ ! -e "$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS" ]; then + ViashError "Output file '$VIASH_PAR_RSEM_COUNTS_TRANSCRIPTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BAM_STAR_RSEM" ] && [ ! -e "$VIASH_PAR_BAM_STAR_RSEM" ]; then + ViashError "Output file '$VIASH_PAR_BAM_STAR_RSEM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BAM_GENOME_RSEM" ] && [ ! -e "$VIASH_PAR_BAM_GENOME_RSEM" ]; then + ViashError "Output file '$VIASH_PAR_BAM_GENOME_RSEM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BAM_TRANSCRIPT_RSEM" ] && [ ! -e "$VIASH_PAR_BAM_TRANSCRIPT_RSEM" ]; then + ViashError "Output file '$VIASH_PAR_BAM_TRANSCRIPT_RSEM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TPM_GENE" ] && [ ! -e "$VIASH_PAR_TPM_GENE" ]; then + ViashError "Output file '$VIASH_PAR_TPM_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE_SCALED" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_GENE_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_TPM_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_TPM_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_COUNTS_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_COUNTS_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && [ ! -e "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ]; then + ViashError "Output file '$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MARKDUPLICATES_METRICS" ] && [ ! -e "$VIASH_PAR_MARKDUPLICATES_METRICS" ]; then + ViashError "Output file '$VIASH_PAR_MARKDUPLICATES_METRICS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF" ] && [ ! -e "$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF" ]; then + ViashError "Output file '$VIASH_PAR_STRINGTIE_TRANSCRIPT_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_COVERAGE_GTF" ] && [ ! -e "$VIASH_PAR_STRINGTIE_COVERAGE_GTF" ]; then + ViashError "Output file '$VIASH_PAR_STRINGTIE_COVERAGE_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_ABUNDANCE" ] && [ ! -e "$VIASH_PAR_STRINGTIE_ABUNDANCE" ]; then + ViashError "Output file '$VIASH_PAR_STRINGTIE_ABUNDANCE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STRINGTIE_BALLGOWN" ] && [ ! -e "$VIASH_PAR_STRINGTIE_BALLGOWN" ]; then + ViashError "Output file '$VIASH_PAR_STRINGTIE_BALLGOWN' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS" ] && [ ! -e "$VIASH_PAR_FEATURECOUNTS" ]; then + ViashError "Output file '$VIASH_PAR_FEATURECOUNTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_FEATURECOUNTS_SUMMARY" ] && [ ! -e "$VIASH_PAR_FEATURECOUNTS_SUMMARY" ]; then + ViashError "Output file '$VIASH_PAR_FEATURECOUNTS_SUMMARY' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BEDGRAPH_FORWARD" ] && [ ! -e "$VIASH_PAR_BEDGRAPH_FORWARD" ]; then + ViashError "Output file '$VIASH_PAR_BEDGRAPH_FORWARD' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BEDGRAPH_REVERSE" ] && [ ! -e "$VIASH_PAR_BEDGRAPH_REVERSE" ]; then + ViashError "Output file '$VIASH_PAR_BEDGRAPH_REVERSE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BIGWIG_FORWARD" ] && [ ! -e "$VIASH_PAR_BIGWIG_FORWARD" ]; then + ViashError "Output file '$VIASH_PAR_BIGWIG_FORWARD' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BIGWIG_REVERSE" ] && [ ! -e "$VIASH_PAR_BIGWIG_REVERSE" ]; then + ViashError "Output file '$VIASH_PAR_BIGWIG_REVERSE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PRESEQ_OUTPUT" ] && [ ! -e "$VIASH_PAR_PRESEQ_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_PRESEQ_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BAMSTAT_OUTPUT" ] && [ ! -e "$VIASH_PAR_BAMSTAT_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_BAMSTAT_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_STRANDEDNESS_OUTPUT" ] && [ ! -e "$VIASH_PAR_STRANDEDNESS_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_STRANDEDNESS_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_LOG' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_PLOT_R' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_BED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_INTERACT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_JUNCTION_SHEET' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_EVENTS_PLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT" ] && [ ! -e "$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_ANNOTATION_OUTPUT_SPLICE_JUNCTIONS_PLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R" ] && [ ! -e "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT_R' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT" ] && [ ! -e "$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT" ]; then + ViashError "Output file '$VIASH_PAR_JUNCTION_SATURATION_OUTPUT_PLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT" ] && [ ! -e "$VIASH_PAR_READ_DISTRIBUTION_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_READ_DISTRIBUTION_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R" ] && [ ! -e "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R" ]; then + ViashError "Output file '$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT_R' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT" ] && [ ! -e "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT" ]; then + ViashError "Output file '$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_PLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING" ] && [ ! -e "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING" ]; then + ViashError "Output file '$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_MAPPING' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE" ] && [ ! -e "$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE" ]; then + ViashError "Output file '$VIASH_PAR_READ_DUPLICATION_OUTPUT_DUPLICATION_RATE_SEQUENCE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TIN_OUTPUT_SUMMARY" ] && [ ! -e "$VIASH_PAR_TIN_OUTPUT_SUMMARY" ]; then + ViashError "Output file '$VIASH_PAR_TIN_OUTPUT_SUMMARY' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TIN_OUTPUT_METRICS" ] && [ ! -e "$VIASH_PAR_TIN_OUTPUT_METRICS" ]; then + ViashError "Output file '$VIASH_PAR_TIN_OUTPUT_METRICS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_DUPMATRIX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_BOXPLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSPLOT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_EXPRESSION_HISTOGRAM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ] && [ ! -e "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ]; then + ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && [ ! -e "$VIASH_PAR_QUALIMAP_QC_REPORT" ]; then + ViashError "Output file '$VIASH_PAR_QUALIMAP_QC_REPORT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_COUNTS" ] && [ ! -e "$VIASH_PAR_QUALIMAP_COUNTS" ]; then + ViashError "Output file '$VIASH_PAR_QUALIMAP_COUNTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_REPORT" ] && [ ! -e "$VIASH_PAR_QUALIMAP_REPORT" ]; then + ViashError "Output file '$VIASH_PAR_QUALIMAP_REPORT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DESEQ2_OUTPUT" ] && [ ! -e "$VIASH_PAR_DESEQ2_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_DESEQ2_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO" ] && [ ! -e "$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO" ]; then + ViashError "Output file '$VIASH_PAR_DESEQ2_OUTPUT_PSEUDO' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MULTIQC_REPORT" ] && [ ! -e "$VIASH_PAR_MULTIQC_REPORT" ]; then + ViashError "Output file '$VIASH_PAR_MULTIQC_REPORT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MULTIQC_DATA" ] && [ ! -e "$VIASH_PAR_MULTIQC_DATA" ]; then + ViashError "Output file '$VIASH_PAR_MULTIQC_DATA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MULTIQC_PLOTS" ] && [ ! -e "$VIASH_PAR_MULTIQC_PLOTS" ]; then + ViashError "Output file '$VIASH_PAR_MULTIQC_PLOTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MULTIQC_VERSIONS" ] && [ ! -e "$VIASH_PAR_MULTIQC_VERSIONS" ]; then + ViashError "Output file '$VIASH_PAR_MULTIQC_VERSIONS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_GENE" ] && [ ! -e "$VIASH_PAR_PSEUDO_COUNTS_GENE" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_COUNTS_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -e "$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_COUNTS_GENE_LENGTH_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED" ] && [ ! -e "$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_COUNTS_GENE_SCALED' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_TPM_GENE" ] && [ ! -e "$VIASH_PAR_PSEUDO_TPM_GENE" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_TPM_GENE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_TPM_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_COUNTS_TRANSCRIPT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && [ ! -e "$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ]; then + ViashError "Output file '$VIASH_PAR_PSEUDO_QUANT_MERGED_SUMMARIZEDEXPERIMENT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/nextflow/bedtools_genomecov/.config.vsh.yaml b/target/nextflow/bedtools_genomecov/.config.vsh.yaml new file mode 100644 index 0000000..b5ba8db --- /dev/null +++ b/target/nextflow/bedtools_genomecov/.config.vsh.yaml @@ -0,0 +1,218 @@ +name: "bedtools_genomecov" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity." + info: null + required: false + choices: + - "unstranded" + - "forward" + - "reverse" + - "auto" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam" + description: "Genome BAM file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_bedtools_args" + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--bedgraph_forward" + info: null + default: + - "$id.forward.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bedgraph_reverse" + info: null + default: + - "$id.reverse.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Compute BEDGRAPH (-bg) summaries of feature coverage" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "chr19.bam" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/bedtools_genomecov.nf" + last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && \\\napt-get install -y build-essential wget && \\\nwget --no-check-certificate\ + \ https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static\ + \ && \\\nmv bedtools.static /usr/local/bin/bedtools && \\\nchmod a+x /usr/local/bin/bedtools\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/bedtools_genomecov/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/bedtools_genomecov" + executable: "target/nextflow/bedtools_genomecov/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/bedtools_genomecov/main.nf b/target/nextflow/bedtools_genomecov/main.nf new file mode 100644 index 0000000..a4adfda --- /dev/null +++ b/target/nextflow/bedtools_genomecov/main.nf @@ -0,0 +1,3876 @@ +// bedtools_genomecov v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "bedtools_genomecov", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "string", + "name" : "--strandedness", + "description" : "Sample strand-specificity.", + "required" : false, + "choices" : [ + "unstranded", + "forward", + "reverse", + "auto" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam", + "description" : "Genome BAM file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extra_bedtools_args", + "default" : [ + "" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--bedgraph_forward", + "default" : [ + "$id.forward.bedgraph" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bedgraph_reverse", + "default" : [ + "$id.reverse.bedgraph" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Compute BEDGRAPH (-bg) summaries of feature coverage", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/chr19.bam" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/local/bedtools_genomecov.nf" + ], + "last_sha" : "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\napt-get install -y build-essential wget && \\\\\nwget --no-check-certificate https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static && \\\\\nmv bedtools.static /usr/local/bin/bedtools && \\\\\nchmod a+x /usr/local/bin/bedtools\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/bedtools_genomecov/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/bedtools_genomecov", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRA_BEDTOOLS_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_BEDTOOLS_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extra_bedtools_args='&'#" ; else echo "# par_extra_bedtools_args="; fi ) +$( if [ ! -z ${VIASH_PAR_BEDGRAPH_FORWARD+x} ]; then echo "${VIASH_PAR_BEDGRAPH_FORWARD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bedgraph_forward='&'#" ; else echo "# par_bedgraph_forward="; fi ) +$( if [ ! -z ${VIASH_PAR_BEDGRAPH_REVERSE+x} ]; then echo "${VIASH_PAR_BEDGRAPH_REVERSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bedgraph_reverse='&'#" ; else echo "# par_bedgraph_reverse="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +prefix_forward="forward" +prefix_reverse="reverse" +if [ \\$par_strandedness == 'reverse' ]; then + prefix_forward="reverse" + prefix_reverse="forward" +fi + +bedtools genomecov \\\\ + -ibam \\$par_bam \\\\ + -bg \\\\ + -strand + \\\\ + \\$par_extra_bedtools_args | bedtools sort > \\$prefix_forward.bedGraph + +bedtools genomecov \\\\ + -ibam \\$par_bam \\\\ + -bg \\\\ + -strand - \\\\ + \\$par_extra_bedtools_args | bedtools sort > \\$prefix_reverse.bedGraph + +mv \\$prefix_forward.bedGraph \\$par_bedgraph_forward +mv \\$prefix_reverse.bedGraph \\$par_bedgraph_reverse +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/bedtools_genomecov", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/bedtools_genomecov/nextflow.config b/target/nextflow/bedtools_genomecov/nextflow.config new file mode 100644 index 0000000..3ee7c34 --- /dev/null +++ b/target/nextflow/bedtools_genomecov/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'bedtools_genomecov' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Compute BEDGRAPH (-bg) summaries of feature coverage' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/bedtools_genomecov/nextflow_labels.config b/target/nextflow/bedtools_genomecov/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/bedtools_genomecov/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/bedtools_genomecov/nextflow_schema.json b/target/nextflow/bedtools_genomecov/nextflow_schema.json new file mode 100644 index 0000000..4a4a0b3 --- /dev/null +++ b/target/nextflow/bedtools_genomecov/nextflow_schema.json @@ -0,0 +1,129 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "bedtools_genomecov", +"description": "Compute BEDGRAPH (-bg) summaries of feature coverage", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "strandedness": { + "type": + "string", + "description": "Type: `string`, choices: ``unstranded`, `forward`, `reverse`, `auto``. Sample strand-specificity", + "help_text": "Type: `string`, choices: ``unstranded`, `forward`, `reverse`, `auto``. Sample strand-specificity.", + "enum": ["unstranded", "forward", "reverse", "auto"] + + + } + + + , + "bam": { + "type": + "string", + "description": "Type: `file`. Genome BAM file", + "help_text": "Type: `file`. Genome BAM file" + + } + + + , + "extra_bedtools_args": { + "type": + "string", + "description": "Type: `string`, default: ``. ", + "help_text": "Type: `string`, default: ``. " + , + "default":"" + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "bedgraph_forward": { + "type": + "string", + "description": "Type: `file`, default: `$id.forward.bedgraph`. ", + "help_text": "Type: `file`, default: `$id.forward.bedgraph`. " + , + "default":"$id.forward.bedgraph" + } + + + , + "bedgraph_reverse": { + "type": + "string", + "description": "Type: `file`, default: `$id.reverse.bedgraph`. ", + "help_text": "Type: `file`, default: `$id.reverse.bedgraph`. " + , + "default":"$id.reverse.bedgraph" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/cat_additional_fasta/.config.vsh.yaml b/target/nextflow/cat_additional_fasta/.config.vsh.yaml new file mode 100644 index 0000000..c3dd55b --- /dev/null +++ b/target/nextflow/cat_additional_fasta/.config.vsh.yaml @@ -0,0 +1,222 @@ +name: "cat_additional_fasta" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--fasta" + description: "Path to FASTA genome file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "Path to GTF annotation file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--additional_fasta" + description: "FASTA file to concatenate to genome FASTA file e.g. containing spike-in\ + \ sequences." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--biotype" + description: "Biotype value to use when appending entries to GTF file when additional\ + \ fasta file is provided." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--fasta_output" + description: "Concatenated FASTA file." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf_output" + description: "Concatenated GTF file." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "python_script" + path: "script.py" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Concatenate addional fasta file to reference FASTA and GTF files.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "genome.fasta" +- type: "file" + path: "genes.gtf.gz" +- type: "file" + path: "gfp.fa.gz" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/cat_additional_fasta.nf" + last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/cat_additional_fasta/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/cat_additional_fasta" + executable: "target/nextflow/cat_additional_fasta/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/cat_additional_fasta/main.nf b/target/nextflow/cat_additional_fasta/main.nf new file mode 100644 index 0000000..6ce4ebc --- /dev/null +++ b/target/nextflow/cat_additional_fasta/main.nf @@ -0,0 +1,3927 @@ +// cat_additional_fasta v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "cat_additional_fasta", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--fasta", + "description" : "Path to FASTA genome file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf", + "description" : "Path to GTF annotation file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--additional_fasta", + "description" : "FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--biotype", + "description" : "Biotype value to use when appending entries to GTF file when additional fasta file is provided.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--fasta_output", + "description" : "Concatenated FASTA file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf_output", + "description" : "Concatenated GTF file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Concatenate addional fasta file to reference FASTA and GTF files.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/minimal_test/reference/genome.fasta" + }, + { + "type" : "file", + "path" : "/testData/minimal_test/reference/genes.gtf.gz" + }, + { + "type" : "file", + "path" : "/testData/minimal_test/reference/gfp.fa.gz" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/local/cat_additional_fasta.nf" + ], + "last_sha" : "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/cat_additional_fasta/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/cat_additional_fasta", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.py" +cat > "$tempscript" << VIASHMAIN +#!/usr/bin/env python3 +""" +Read a custom fasta file and create a custom GTF containing each entry +""" +from itertools import groupby +import logging +import os +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'fasta': $( if [ ! -z ${VIASH_PAR_FASTA+x} ]; then echo "r'${VIASH_PAR_FASTA//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'gtf': $( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "r'${VIASH_PAR_GTF//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'additional_fasta': $( if [ ! -z ${VIASH_PAR_ADDITIONAL_FASTA+x} ]; then echo "r'${VIASH_PAR_ADDITIONAL_FASTA//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'biotype': $( if [ ! -z ${VIASH_PAR_BIOTYPE+x} ]; then echo "r'${VIASH_PAR_BIOTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'fasta_output': $( if [ ! -z ${VIASH_PAR_FASTA_OUTPUT+x} ]; then echo "r'${VIASH_PAR_FASTA_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'gtf_output': $( if [ ! -z ${VIASH_PAR_GTF_OUTPUT+x} ]; then echo "r'${VIASH_PAR_GTF_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +dep = { + +} + +## VIASH END + +def fasta_iter(fasta_name): + """ + modified from Brent Pedersen + Correct Way To Parse A Fasta File In Python + given a fasta file. yield tuples of header, sequence + + Fasta iterator from https://www.biostars.org/p/710/#120760 + """ + with open(fasta_name) as fh: + # ditch the boolean (x[0]) and just keep the header or sequence since + # we know they alternate. + faiter = (x[1] for x in groupby(fh, lambda line: line[0] == ">")) + for header in faiter: + # drop the ">" + headerStr = header.__next__()[1:].strip() + # join all sequence lines to one. + seq = "".join(s.strip() for s in faiter.__next__()) + yield (headerStr, seq) + +def fasta2gtf(fasta, output, biotype): + fiter = fasta_iter(fasta) + # GTF output lines + lines = [] + attributes = 'exon_id "{name}.1"; exon_number "1";{biotype} gene_id "{name}_gene"; gene_name "{name}_gene"; gene_source "custom"; transcript_id "{name}_gene"; transcript_name "{name}_gene";\\\\n' + line_template = "{name}\\\\ttransgene\\\\texon\\\\t1\\\\t{length}\\\\t.\\\\t+\\\\t.\\\\t" + attributes + for ff in fiter: + name, seq = ff + # Use first ID as separated by spaces as the "sequence name" + # (equivalent to "chromosome" in other cases) + seqname = name.split()[0] + # Remove all spaces + name = seqname.replace(" ", "_") + length = len(seq) + biotype_attr = "" + if biotype: + biotype_attr = f' {biotype} "transgene";' + line = line_template.format(name=name, length=length, biotype=biotype_attr) + lines.append(line) + with open(output, "w") as f: + f.write("".join(lines)) + +add_name = os.path.basename(par['additional_fasta']) +output = os.path.splitext(add_name)[0] + ".gtf" +fasta2gtf(par['additional_fasta'], output, par['biotype']) + +with open(par['fasta'], 'r') as f1: + content1 = f1.read() +with open(par['additional_fasta'], 'r') as f2: + content2 = f2.read() +with open(par['fasta_output'], 'w') as f_out: + f_out.write(content1 + content2) +with open(par['gtf'], 'r') as g1: + g_content1 = g1.read() +with open(output, 'r') as g2: + g_content2 = g2.read() +with open(par['gtf_output'], 'w') as g_out: + g_out.write(g_content1 + g_content2) +VIASHMAIN +python -B "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/cat_additional_fasta", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/cat_additional_fasta/nextflow.config b/target/nextflow/cat_additional_fasta/nextflow.config new file mode 100644 index 0000000..b0c98ac --- /dev/null +++ b/target/nextflow/cat_additional_fasta/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'cat_additional_fasta' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Concatenate addional fasta file to reference FASTA and GTF files.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/cat_additional_fasta/nextflow_labels.config b/target/nextflow/cat_additional_fasta/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/cat_additional_fasta/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/cat_additional_fasta/nextflow_schema.json b/target/nextflow/cat_additional_fasta/nextflow_schema.json new file mode 100644 index 0000000..7eef229 --- /dev/null +++ b/target/nextflow/cat_additional_fasta/nextflow_schema.json @@ -0,0 +1,136 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "cat_additional_fasta", +"description": "Concatenate addional fasta file to reference FASTA and GTF files.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "fasta": { + "type": + "string", + "description": "Type: `file`, required. Path to FASTA genome file", + "help_text": "Type: `file`, required. Path to FASTA genome file." + + } + + + , + "gtf": { + "type": + "string", + "description": "Type: `file`. Path to GTF annotation file", + "help_text": "Type: `file`. Path to GTF annotation file." + + } + + + , + "additional_fasta": { + "type": + "string", + "description": "Type: `file`. FASTA file to concatenate to genome FASTA file e", + "help_text": "Type: `file`. FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences." + + } + + + , + "biotype": { + "type": + "string", + "description": "Type: `string`. Biotype value to use when appending entries to GTF file when additional fasta file is provided", + "help_text": "Type: `string`. Biotype value to use when appending entries to GTF file when additional fasta file is provided." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "fasta_output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.fasta_output`. Concatenated FASTA file", + "help_text": "Type: `file`, default: `$id.$key.fasta_output`. Concatenated FASTA file." + , + "default":"$id.$key.fasta_output" + } + + + , + "gtf_output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.gtf_output`. Concatenated GTF file", + "help_text": "Type: `file`, default: `$id.$key.gtf_output`. Concatenated GTF file." + , + "default":"$id.$key.gtf_output" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/cat_fastq/.config.vsh.yaml b/target/nextflow/cat_fastq/.config.vsh.yaml new file mode 100644 index 0000000..6fe72e6 --- /dev/null +++ b/target/nextflow/cat_fastq/.config.vsh.yaml @@ -0,0 +1,209 @@ +name: "cat_fastq" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--read_1" + description: "Read 1 fastq files to be concatenated" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--read_2" + description: "Read 2 fastq files to be concatenated" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--fastq_1" + description: "Concatenated read 1 fastq" + info: null + default: + - "$id_r1.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Concatenated read 2 fastq" + info: null + default: + - "$id_r2.fastq" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Concatenate multiple fastq files" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "SRR6357070_1.fastq.gz" +- type: "file" + path: "SRR6357071_1.fastq.gz" +- type: "file" + path: "SRR6357070_2.fastq.gz" +- type: "file" + path: "SRR6357071_2.fastq.gz" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/cat/fastq/main.nf" + - "modules/nf-core/cat/fastq/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/cat_fastq/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/cat_fastq" + executable: "target/nextflow/cat_fastq/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/cat_fastq/main.nf b/target/nextflow/cat_fastq/main.nf new file mode 100644 index 0000000..1da9c41 --- /dev/null +++ b/target/nextflow/cat_fastq/main.nf @@ -0,0 +1,3862 @@ +// cat_fastq v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "cat_fastq", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--read_1", + "description" : "Read 1 fastq files to be concatenated", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read_2", + "description" : "Read 2 fastq files to be concatenated", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--fastq_1", + "description" : "Concatenated read 1 fastq", + "default" : [ + "$id_r1.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_2", + "description" : "Concatenated read 2 fastq", + "default" : [ + "$id_r2.fastq" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Concatenate multiple fastq files", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz" + }, + { + "type" : "file", + "path" : "/testData/minimal_test/input_fastq/SRR6357071_1.fastq.gz" + }, + { + "type" : "file", + "path" : "/testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz" + }, + { + "type" : "file", + "path" : "/testData/minimal_test/input_fastq/SRR6357071_2.fastq.gz" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/cat/fastq/main.nf", + "modules/nf-core/cat/fastq/meta.yml" + ], + "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/cat_fastq/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/cat_fastq", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_READ_1+x} ]; then echo "${VIASH_PAR_READ_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_read_1='&'#" ; else echo "# par_read_1="; fi ) +$( if [ ! -z ${VIASH_PAR_READ_2+x} ]; then echo "${VIASH_PAR_READ_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_read_2='&'#" ; else echo "# par_read_2="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQ_1+x} ]; then echo "${VIASH_PAR_FASTQ_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_1='&'#" ; else echo "# par_fastq_1="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQ_2+x} ]; then echo "${VIASH_PAR_FASTQ_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_2='&'#" ; else echo "# par_fastq_2="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +IFS=";" read -ra read_1 <<< \\$par_read_1 +IFS=";" read -ra read_2 <<< \\$par_read_2 + +filename=\\$(basename -- "\\${read_1[0]}") +if [ \\${filename##*.} == "gz" ]; then + command="zcat" +else + command="cat" +fi + +if [ \\${#read_1[@]} -gt 0 ]; then + \\$command \\${read_1[*]} > \\$par_fastq_1 +fi +if [ \\${#read_2[@]} -gt 0 ]; then + \\$command \\${read_2[*]} > \\$par_fastq_2 +fi +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/cat_fastq", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/cat_fastq/nextflow.config b/target/nextflow/cat_fastq/nextflow.config new file mode 100644 index 0000000..de39a51 --- /dev/null +++ b/target/nextflow/cat_fastq/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'cat_fastq' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Concatenate multiple fastq files' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/cat_fastq/nextflow_labels.config b/target/nextflow/cat_fastq/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/cat_fastq/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/cat_fastq/nextflow_schema.json b/target/nextflow/cat_fastq/nextflow_schema.json new file mode 100644 index 0000000..6698d61 --- /dev/null +++ b/target/nextflow/cat_fastq/nextflow_schema.json @@ -0,0 +1,116 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "cat_fastq", +"description": "Concatenate multiple fastq files", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "read_1": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. Read 1 fastq files to be concatenated", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Read 1 fastq files to be concatenated" + + } + + + , + "read_2": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. Read 2 fastq files to be concatenated", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Read 2 fastq files to be concatenated" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "fastq_1": { + "type": + "string", + "description": "Type: `file`, default: `$id_r1.fastq`. Concatenated read 1 fastq", + "help_text": "Type: `file`, default: `$id_r1.fastq`. Concatenated read 1 fastq" + , + "default":"$id_r1.fastq" + } + + + , + "fastq_2": { + "type": + "string", + "description": "Type: `file`, default: `$id_r2.fastq`. Concatenated read 2 fastq", + "help_text": "Type: `file`, default: `$id_r2.fastq`. Concatenated read 2 fastq" + , + "default":"$id_r2.fastq" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/copy_if_exists/.config.vsh.yaml b/target/nextflow/copy_if_exists/.config.vsh.yaml new file mode 100644 index 0000000..1d6d1c3 --- /dev/null +++ b/target/nextflow/copy_if_exists/.config.vsh.yaml @@ -0,0 +1,183 @@ +name: "copy_if_exists" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--required_file" + info: null + example: + - "/tmp/rnaseq_workflow_config/required_file.txt" + must_exist: false + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--optional_file" + info: null + example: + - "/tmp/rnaseq_workflow_config/optional_file.txt" + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Ouput" + arguments: + - type: "file" + name: "--output" + info: null + default: + - "copy_if_exists_output" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "required_file.txt" +- type: "file" + path: "optional_file.txt" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/extra/copy_if_exists/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/copy_if_exists" + executable: "target/nextflow/copy_if_exists/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/copy_if_exists/main.nf b/target/nextflow/copy_if_exists/main.nf new file mode 100644 index 0000000..b9ab77e --- /dev/null +++ b/target/nextflow/copy_if_exists/main.nf @@ -0,0 +1,3829 @@ +// copy_if_exists v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "copy_if_exists", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--required_file", + "example" : [ + "/tmp/rnaseq_workflow_config/required_file.txt" + ], + "must_exist" : false, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--optional_file", + "example" : [ + "/tmp/rnaseq_workflow_config/optional_file.txt" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Ouput", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "default" : [ + "copy_if_exists_output" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/assets/required_file.txt" + }, + { + "type" : "file", + "path" : "/src/assets/optional_file.txt" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/extra/copy_if_exists/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/copy_if_exists", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_REQUIRED_FILE+x} ]; then echo "${VIASH_PAR_REQUIRED_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_required_file='&'#" ; else echo "# par_required_file="; fi ) +$( if [ ! -z ${VIASH_PAR_OPTIONAL_FILE+x} ]; then echo "${VIASH_PAR_OPTIONAL_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_optional_file='&'#" ; else echo "# par_optional_file="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +mkdir -p \\$par_output + +# This file is checked by the Nextflow module wrapper +cp \\$par_required_file "\\$par_output" + +# If the variable is empty, we use the default one (registered as a resource) +if [ -z \\$par_optional_file ]; then + echo "No optional_file provided, using the default" + cp \\$meta_resources_dir/optional_file.txt "\\$par_output" +else + echo "Optional file provided" + if [ -f \\$par_optional_file ]; then + cp \\$par_optional_file "\\$par_output" + else + # Unreachable: the Viash-generated module checks this + echo "Optional file does not exist" + exit 1 + fi +fi + +echo "Done" +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/copy_if_exists", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/copy_if_exists/nextflow.config b/target/nextflow/copy_if_exists/nextflow.config new file mode 100644 index 0000000..5ddaef3 --- /dev/null +++ b/target/nextflow/copy_if_exists/nextflow.config @@ -0,0 +1,124 @@ +manifest { + name = 'copy_if_exists' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/copy_if_exists/nextflow_labels.config b/target/nextflow/copy_if_exists/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/copy_if_exists/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/copy_if_exists/nextflow_schema.json b/target/nextflow/copy_if_exists/nextflow_schema.json new file mode 100644 index 0000000..ce16f25 --- /dev/null +++ b/target/nextflow/copy_if_exists/nextflow_schema.json @@ -0,0 +1,105 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "copy_if_exists", +"description": "No description", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "required_file": { + "type": + "string", + "description": "Type: `file`, required, example: `/tmp/rnaseq_workflow_config/required_file.txt`. ", + "help_text": "Type: `file`, required, example: `/tmp/rnaseq_workflow_config/required_file.txt`. " + + } + + + , + "optional_file": { + "type": + "string", + "description": "Type: `file`, example: `/tmp/rnaseq_workflow_config/optional_file.txt`. ", + "help_text": "Type: `file`, example: `/tmp/rnaseq_workflow_config/optional_file.txt`. " + + } + + +} +}, + + + "ouput" : { + "title": "Ouput", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `copy_if_exists_output`. ", + "help_text": "Type: `file`, default: `copy_if_exists_output`. " + , + "default":"copy_if_exists_output" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/ouput" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/copy_if_exists/optional_file.txt b/target/nextflow/copy_if_exists/optional_file.txt new file mode 100644 index 0000000..6462a73 --- /dev/null +++ b/target/nextflow/copy_if_exists/optional_file.txt @@ -0,0 +1 @@ +Optional! diff --git a/target/nextflow/copy_if_exists/required_file.txt b/target/nextflow/copy_if_exists/required_file.txt new file mode 100644 index 0000000..b4dbbb5 --- /dev/null +++ b/target/nextflow/copy_if_exists/required_file.txt @@ -0,0 +1 @@ +Required! diff --git a/target/nextflow/deseq2_qc/.config.vsh.yaml b/target/nextflow/deseq2_qc/.config.vsh.yaml new file mode 100644 index 0000000..9519ecc --- /dev/null +++ b/target/nextflow/deseq2_qc/.config.vsh.yaml @@ -0,0 +1,287 @@ +name: "deseq2_qc" +version: "v0.3.0" +argument_groups: +- name: "input" + arguments: + - type: "file" + name: "--counts" + description: "Count file matrix where rows are genes and columns are samples." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--vst" + description: "Use vst transformation instead of rlog with .DESeq2" + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--count_col" + description: "First column containing sample count data." + info: null + default: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--id_col" + description: "Column containing identifiers to be used." + info: null + default: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sample_suffix" + description: "Suffix to remove after sample name in columns e.g. '.rmDup.bam'\ + \ if 'DRUG_R1.rmDup.bam'." + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outprefix" + description: "Output prefix" + info: null + default: + - "deseq2" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--label" + description: "Label to used in MultiQC report" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--outdir" + info: null + default: + - "deseq2" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pca_multiqc" + info: null + default: + - "deseq2.pca.vals_mqc.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sample_dists_multiqc" + info: null + default: + - "deseq2.sample.dists_mqc.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "r_script" + path: "script.r" + is_executable: true +- type: "file" + path: "deseq2_pca_header.txt" +- type: "file" + path: "deseq2_clustering_header.txt" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "run deseq2, perform pca, generate heatmaps and scatterplots for samples\ + \ in the counts files\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "counts.tsv" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/deseq2_qc.nf" + last_sha: "92b2a7857de1dda9d1c19a088941fc81e2976ff7" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:latest" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "libcurl4-openssl-dev" + - "r-base" + - "r-base-core" + - "libxml2-dev" + - "procps" + - "libssl-dev" + interactive: false + - type: "r" + cran: + - "optparse" + - "ggplot2" + - "RColorBrewer" + - "pheatmap" + - "stringr" + - "matrixStats" + bioc: + - "DESeq2" + bioc_force_install: false + warnings_as_errors: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/deseq2_qc/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/deseq2_qc" + executable: "target/nextflow/deseq2_qc/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/deseq2_qc/deseq2_clustering_header.txt b/target/nextflow/deseq2_qc/deseq2_clustering_header.txt new file mode 100644 index 0000000..04e10ef --- /dev/null +++ b/target/nextflow/deseq2_qc/deseq2_clustering_header.txt @@ -0,0 +1,12 @@ +#id: 'deseq2_clustering' +#section_name: 'DESeq2 sample similarity' +#description: "is generated from clustering by Euclidean distances between +# DESeq2 +# rlog values for each sample +# in the deseq2_qc.r script." +#plot_type: 'heatmap' +#anchor: 'deseq2_clustering' +#pconfig: +# title: 'DESeq2: Heatmap of the sample-to-sample distances' +# xlab: True +# reverseColors: True diff --git a/target/nextflow/deseq2_qc/deseq2_pca_header.txt b/target/nextflow/deseq2_qc/deseq2_pca_header.txt new file mode 100644 index 0000000..2599732 --- /dev/null +++ b/target/nextflow/deseq2_qc/deseq2_pca_header.txt @@ -0,0 +1,11 @@ +#id: 'deseq2_pca' +#section_name: 'DESeq2 PCA plot' +#description: "PCA plot between samples in the experiment. +# These values are calculated using DESeq2 +# in the deseq2_qc.r script." +#plot_type: 'scatter' +#anchor: 'deseq2_pca' +#pconfig: +# title: 'DESeq2: Principal component plot' +# xlab: PC1 +# ylab: PC2 diff --git a/target/nextflow/deseq2_qc/main.nf b/target/nextflow/deseq2_qc/main.nf new file mode 100644 index 0000000..f46d116 --- /dev/null +++ b/target/nextflow/deseq2_qc/main.nf @@ -0,0 +1,4173 @@ +// deseq2_qc v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "deseq2_qc", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "input", + "arguments" : [ + { + "type" : "file", + "name" : "--counts", + "description" : "Count file matrix where rows are genes and columns are samples.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--vst", + "description" : "Use vst transformation instead of rlog with .DESeq2", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--count_col", + "description" : "First column containing sample count data.", + "default" : [ + 3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--id_col", + "description" : "Column containing identifiers to be used.", + "default" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sample_suffix", + "description" : "Suffix to remove after sample name in columns e.g. '.rmDup.bam' if 'DRUG_R1.rmDup.bam'.", + "default" : [ + "" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outprefix", + "description" : "Output prefix", + "default" : [ + "deseq2" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--label", + "description" : "Label to used in MultiQC report", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--outdir", + "default" : [ + "deseq2" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pca_multiqc", + "default" : [ + "deseq2.pca.vals_mqc.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sample_dists_multiqc", + "default" : [ + "deseq2.sample.dists_mqc.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "r_script", + "path" : "script.r", + "is_executable" : true + }, + { + "type" : "file", + "path" : "deseq2_pca_header.txt" + }, + { + "type" : "file", + "path" : "deseq2_clustering_header.txt" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "run deseq2, perform pca, generate heatmaps and scatterplots for samples in the counts files\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/counts.tsv" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/local/deseq2_qc.nf" + ], + "last_sha" : "92b2a7857de1dda9d1c19a088941fc81e2976ff7" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "debian:latest", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "libcurl4-openssl-dev", + "r-base", + "r-base-core", + "libxml2-dev", + "procps", + "libssl-dev" + ], + "interactive" : false + }, + { + "type" : "r", + "cran" : [ + "optparse", + "ggplot2", + "RColorBrewer", + "pheatmap", + "stringr", + "matrixStats" + ], + "bioc" : [ + "DESeq2" + ], + "bioc_force_install" : false, + "warnings_as_errors" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/deseq2_qc/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/deseq2_qc", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.R" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +# treat warnings as errors +.viash_orig_warn <- options(warn = 2) + +par <- list( + "counts" = $( if [ ! -z ${VIASH_PAR_COUNTS+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_COUNTS" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "vst" = $( if [ ! -z ${VIASH_PAR_VST+x} ]; then echo -n "as.logical(toupper('"; echo -n "$VIASH_PAR_VST" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'))"; else echo NULL; fi ), + "count_col" = $( if [ ! -z ${VIASH_PAR_COUNT_COL+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_COUNT_COL" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "id_col" = $( if [ ! -z ${VIASH_PAR_ID_COL+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_ID_COL" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "sample_suffix" = $( if [ ! -z ${VIASH_PAR_SAMPLE_SUFFIX+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_SAMPLE_SUFFIX" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "outprefix" = $( if [ ! -z ${VIASH_PAR_OUTPREFIX+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPREFIX" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "label" = $( if [ ! -z ${VIASH_PAR_LABEL+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_LABEL" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "outdir" = $( if [ ! -z ${VIASH_PAR_OUTDIR+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTDIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "pca_multiqc" = $( if [ ! -z ${VIASH_PAR_PCA_MULTIQC+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_PCA_MULTIQC" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "sample_dists_multiqc" = $( if [ ! -z ${VIASH_PAR_SAMPLE_DISTS_MULTIQC+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_SAMPLE_DISTS_MULTIQC" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ) +) +meta <- list( + "name" = $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kib" = $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KIB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mib" = $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MIB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gib" = $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GIB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tib" = $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TIB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pib" = $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PIB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ) +) +dep <- list( + +) + + +# restore original warn setting +options(.viash_orig_warn) +rm(.viash_orig_warn) + +## VIASH END + +# REQUIREMENTS + +## PCA, HEATMAP AND SCATTERPLOTS FOR SAMPLES IN COUNTS FILE +## - SAMPLE NAMES HAVE TO END IN e.g. "_R1" REPRESENTING REPLICATE ID. LAST 3 CHARACTERS OF SAMPLE NAME WILL BE TRIMMED TO OBTAIN GROUP ID FOR DESEQ2 COMPARISONS. + +# LOAD LIBRARIES +library(DESeq2) +library(ggplot2) +library(RColorBrewer) +library(pheatmap) +library(stringr) + +if (file.exists(par\\$outdir) == FALSE) { + dir.create(par\\$outdir, recursive = TRUE) +} + +# READ IN COUNTS FILE +count_table <- read.delim(file = par\\$counts, header = TRUE, row.names = NULL) +rownames(count_table) <- count_table[, par\\$id_col] +count_table <- count_table[, par\\$count_col:ncol(count_table), drop = FALSE] +colnames(count_table) <- gsub(par\\$sample_suffix, "", colnames(count_table)) +colnames(count_table) <- gsub(pattern = '\\\\\\\\.\\$', replacement = '', colnames(count_table)) + +# RUN DESEQ2 +samples_vec <- colnames(count_table) +name_components <- strsplit(samples_vec, "_") +n_components <- length(name_components[[1]]) +decompose <- n_components != 1 && all(sapply(name_components, length) == n_components) +coldata <- data.frame(samples_vec, sample = samples_vec, row.names = 1) +if (decompose) { + groupings <- as.data.frame(lapply(1:n_components, function(i) sapply(name_components, "[[", i))) + n_distinct <- sapply(groupings, function(grp) length(unique(grp))) + groupings <- groupings[n_distinct != 1 & n_distinct != length(samples_vec)] + if (ncol(groupings) != 0) { + names(groupings) <- paste0("Group", 1:ncol(groupings)) + coldata <- cbind(coldata, groupings) + } else { + decompose <- FALSE + } +} + +DDSFile <- paste(par\\$outdir, "/", par\\$outprefix, ".dds.RData", sep = "") + +counts <- count_table[, samples_vec, drop = FALSE] +dds <- DESeqDataSetFromMatrix(countData = round(counts), colData = coldata, design = ~1) +dds <- estimateSizeFactors(dds) + +# No point if only one sample, or one gene +if (min(dim(count_table)) <= 1) { + save(dds, file = DDSFile) + saveRDS(dds, file = sub("\\\\\\\\.dds\\\\\\\\.RData\\$", ".rds", DDSFile)) + warning("Not enough samples or genes in counts file for PCA.", call. = FALSE) + quit(save = "no", status = 0, runLast = FALSE) +} + +if (!par\\$vst) { + vst_name <- "rlog" + rld <- rlog(dds) +} else { + vst_name <- "vst" + rld <- varianceStabilizingTransformation(dds) +} + +assay(dds, vst_name) <- assay(rld) +save(dds, file = DDSFile) +saveRDS(dds, file = sub("\\\\\\\\.dds\\\\\\\\.RData\\$", ".rds", DDSFile)) + +# PLOT QC + +##' PCA pre-processeor +##' +##' Generate all the necessary information to plot PCA from a DESeq2 object +##' in which an assay containing a variance-stabilised matrix of counts is +##' stored. Copied from DESeq2::plotPCA, but with additional ability to +##' say which assay to run the PCA on. +##' +##' @param object The DESeq2DataSet object. +##' @param ntop number of top genes to use for principla components, selected by highest row variance. +##' @param assay the name or index of the assay that stores the variance-stabilised data. +##' @return A data.frame containing the projected data alongside the grouping columns. +##' A 'percentVar' attribute is set which includes the percentage of variation each PC explains, +##' and additionally how much the variation within that PC is explained by the grouping variable. +##' @author Gavin Kelly + +plotPCA_vst <- function(object, ntop = 500, assay = length(assays(object))) { + rv <- rowVars(assay(object, assay), useNames = TRUE) + select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))] + pca <- prcomp(t(assay(object, assay)[select, ]), center = TRUE, scale = FALSE) + percentVar <- pca\\$sdev^2 / sum(pca\\$sdev^2) + df <- cbind(as.data.frame(colData(object)), pca\\$x) + # Order points so extreme samples are more likely to get label + ord <- order(abs(rank(df\\$PC1) - median(df\\$PC1)), abs(rank(df\\$PC2) - median(df\\$PC2))) + df <- df[ord, ] + attr(df, "percentVar") <- data.frame(PC = seq(along = percentVar), percentVar = 100 * percentVar) + return(df) +} + +PlotFile <- paste(par\\$outdir, "/", par\\$outprefix, ".plots.pdf", sep = "") + +pdf(file = PlotFile, onefile = TRUE, width = 7, height = 7) + +## PCA +ntop <- c(500, Inf) +for (n_top_var in ntop) { + pca_data <- plotPCA_vst(dds, assay = vst_name, ntop = n_top_var) + percentVar <- round(attr(pca_data, "percentVar")\\$percentVar) + plot_subtitle <- ifelse(n_top_var == Inf, "All genes", paste("Top", n_top_var, "genes")) + pl <- ggplot(pca_data, aes(PC1, PC2, label = paste0(" ", sample, " "))) + + geom_point() + + geom_text(check_overlap = TRUE, vjust = 0.5, hjust="inward") + + xlab(paste0("PC1: ", percentVar[1], "% variance")) + + ylab(paste0("PC2: ", percentVar[2], "% variance")) + + labs(title = paste0("First PCs on ", vst_name, "-transformed data"), subtitle = plot_subtitle) + + theme(legend.position = "top", + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + panel.border = element_rect(colour = "black", fill = NA, size = 1)) + print(pl) + + if (decompose) { + pc_names <- paste0("PC", attr(pca_data, "percentVar")\\$PC) + long_pc <- reshape(pca_data, varying=pc_names, direction="long", sep="", timevar="component", idvar="pcrow") + long_pc <- subset(long_pc, component <= 5) + long_pc_grp <- reshape(long_pc, varying = names(groupings), direction = "long", sep = "", timevar = "grouper") + long_pc_grp <- subset(long_pc_grp, grouper <= 5) + long_pc_grp\\$component <- paste("PC", long_pc_grp\\$component) + long_pc_grp\\$grouper <- paste0(long_pc_grp\\$grouper, c("st", "nd", "rd", "th", "th")[long_pc_grp\\$grouper], " prefix") + pl <- ggplot(long_pc_grp, aes(x = Group, y = PC)) + + geom_point() + + stat_summary(fun = mean, geom = "line", aes(group = 1)) + + labs(x = NULL, y = NULL, subtitle = plot_subtitle, title = "PCs split by sample-name prefixes") + + facet_grid(component ~ grouper, scales = "free_x") + + scale_x_discrete(guide = guide_axis(n.dodge = 3)) + print(pl) + } +} # at end of loop, we'll be using the user-defined ntop if any, else all genes + +## WRITE PC1 vs PC2 VALUES TO FILE +pca_vals <- pca_data[, c("PC1", "PC2")] +colnames(pca_vals) <- paste0(colnames(pca_vals), ": ", percentVar[1:2], '% variance') +pca_vals <- cbind(sample = rownames(pca_vals), pca_vals) +pca_vals_file <- paste(par\\$outdir, "/", par\\$outprefix, ".pca_vals.txt", sep = "") +write.table(pca_vals, file = pca_vals_file, + row.names = FALSE, col.names = TRUE, + sep = "\\\\t", quote = TRUE) + +## SAMPLE CORRELATION HEATMAP +sampleDists <- dist(t(assay(dds, vst_name))) +sampleDistMatrix <- as.matrix(sampleDists) +colors <- colorRampPalette(rev(brewer.pal(9, "Blues")))(255) +pheatmap( + sampleDistMatrix, + clustering_distance_rows = sampleDists, + clustering_distance_cols = sampleDists, + col = colors, + main = paste("Euclidean distance between", vst_name, "of samples") +) + +## WRITE SAMPLE DISTANCES TO FILE +sample_dist_file <- paste(par\\$outdir, "/", par\\$outprefix, ".sample.dists.txt", sep = "") +write.table(cbind(sample = rownames(sampleDistMatrix), sampleDistMatrix), + file = sample_dist_file, row.names = FALSE, + col.names = TRUE, sep = "\\\\t", quote = FALSE) +dev.off() + +# SAVE SIZE FACTORS +SizeFactorsDir <- paste0(par\\$outdir, "/size_factors/") +if (file.exists(SizeFactorsDir) == FALSE) { + dir.create(SizeFactorsDir, recursive = TRUE) +} + +NormFactorsFile <- paste(SizeFactorsDir, par\\$outprefix, ".size_factors.RData", sep = "") + +normFactors <- sizeFactors(dds) +save(normFactors, file = NormFactorsFile) + +for (name in names(sizeFactors(dds))) { + sizeFactorFile <- paste(SizeFactorsDir, name, ".txt", sep = "") + write(as.numeric(sizeFactors(dds)[name]), file = sizeFactorFile) +} + +# R SESSION INFO +RLogFile <- "R_sessionInfo.log" + +sink(RLogFile) +a <- sessionInfo() +print(a) +sink() + +# Prepare files for MultiQC + +readLines(paste0(meta\\$resources_dir, "/deseq2_pca_header.txt")) |> + stringr::str_replace(pattern = "#id: 'deseq2_pca'", + replace = paste0("#id: '", par\\$label, "_deseq2_pca'")) |> + writeLines(con = "tmp.txt") + +readLines(paste0("tmp.txt")) |> + stringr::str_replace(pattern = "#section_name: 'DESeq2 PCA plot'", + replace = paste0("#section_name: 'DESeq2 PCA plot - '", par\\$label)) |> + writeLines(con = "tmp.txt") + +system2("cat", args = paste0("tmp.txt ", pca_vals_file), stdout = par\\$pca_multiqc) + +readLines(paste0(meta\\$resources_dir, "/deseq2_clustering_header.txt")) |> + stringr::str_replace(pattern = "#id: 'deseq2_clustering'", + replace = paste0("#id: '", par\\$label, "_deseq2_clustering'")) |> + writeLines(con = "tmp.txt") + +readLines(paste0("tmp.txt")) |> + stringr::str_replace(pattern = "#section_name: 'DESeq2 sample similarity'", + replace = paste0("#section_name: 'DESeq2 sample similarity - '", par\\$label)) |> + writeLines(con = "tmp.txt") + +system2("cat", args = paste0("tmp.txt ", sample_dist_file), stdout = par\\$sample_dists_multiqc) +VIASHMAIN +Rscript "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/deseq2_qc", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/deseq2_qc/nextflow.config b/target/nextflow/deseq2_qc/nextflow.config new file mode 100644 index 0000000..d6e414d --- /dev/null +++ b/target/nextflow/deseq2_qc/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'deseq2_qc' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'run deseq2, perform pca, generate heatmaps and scatterplots for samples in the counts files\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/deseq2_qc/nextflow_labels.config b/target/nextflow/deseq2_qc/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/deseq2_qc/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/deseq2_qc/nextflow_schema.json b/target/nextflow/deseq2_qc/nextflow_schema.json new file mode 100644 index 0000000..ab77840 --- /dev/null +++ b/target/nextflow/deseq2_qc/nextflow_schema.json @@ -0,0 +1,182 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "deseq2_qc", +"description": "run deseq2, perform pca, generate heatmaps and scatterplots for samples in the counts files\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "input", + "type": "object", + "description": "No description", + "properties": { + + + "counts": { + "type": + "string", + "description": "Type: `file`, required. Count file matrix where rows are genes and columns are samples", + "help_text": "Type: `file`, required. Count file matrix where rows are genes and columns are samples." + + } + + + , + "vst": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Use vst transformation instead of rlog with ", + "help_text": "Type: `boolean`, default: `false`. Use vst transformation instead of rlog with .DESeq2" + , + "default":false + } + + + , + "count_col": { + "type": + "integer", + "description": "Type: `integer`, default: `3`. First column containing sample count data", + "help_text": "Type: `integer`, default: `3`. First column containing sample count data." + , + "default":3 + } + + + , + "id_col": { + "type": + "integer", + "description": "Type: `integer`, default: `1`. Column containing identifiers to be used", + "help_text": "Type: `integer`, default: `1`. Column containing identifiers to be used." + , + "default":1 + } + + + , + "sample_suffix": { + "type": + "string", + "description": "Type: `string`, default: ``. Suffix to remove after sample name in columns e", + "help_text": "Type: `string`, default: ``. Suffix to remove after sample name in columns e.g. \u0027.rmDup.bam\u0027 if \u0027DRUG_R1.rmDup.bam\u0027." + , + "default":"" + } + + + , + "outprefix": { + "type": + "string", + "description": "Type: `string`, default: `deseq2`. Output prefix", + "help_text": "Type: `string`, default: `deseq2`. Output prefix" + , + "default":"deseq2" + } + + + , + "label": { + "type": + "string", + "description": "Type: `string`. Label to used in MultiQC report", + "help_text": "Type: `string`. Label to used in MultiQC report" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "outdir": { + "type": + "string", + "description": "Type: `file`, default: `deseq2`. ", + "help_text": "Type: `file`, default: `deseq2`. " + , + "default":"deseq2" + } + + + , + "pca_multiqc": { + "type": + "string", + "description": "Type: `file`, default: `deseq2.pca.vals_mqc.tsv`. ", + "help_text": "Type: `file`, default: `deseq2.pca.vals_mqc.tsv`. " + , + "default":"deseq2.pca.vals_mqc.tsv" + } + + + , + "sample_dists_multiqc": { + "type": + "string", + "description": "Type: `file`, default: `deseq2.sample.dists_mqc.tsv`. ", + "help_text": "Type: `file`, default: `deseq2.sample.dists_mqc.tsv`. " + , + "default":"deseq2.sample.dists_mqc.tsv" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/dupradar/.config.vsh.yaml b/target/nextflow/dupradar/.config.vsh.yaml new file mode 100644 index 0000000..9f7acc6 --- /dev/null +++ b/target/nextflow/dupradar/.config.vsh.yaml @@ -0,0 +1,307 @@ +name: "dupradar" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "Sample ID" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input" + description: "path to input alignment file in BAM format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf_annotation" + description: "path to GTF annotation file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--paired" + description: "add flag if input alignment file consists of paired reads" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "strandedness of input bam file reads (forward, reverse or unstranded\ + \ (default, applicable to paired reads))" + info: null + required: false + choices: + - "forward" + - "reverse" + - "unstranded" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_dupmatrix" + description: "path to output file (txt) of duplicate tag counts" + info: null + default: + - "$id.dup_matrix.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_dup_intercept_mqc" + description: "path to output file (txt) of multiqc intercept value DupRadar" + info: null + default: + - "$id.dup_intercept_mqc.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_duprate_exp_boxplot" + description: "path to output file (pdf) of distribution of expression box plot" + info: null + default: + - "$id.duprate_exp_boxplot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_duprate_exp_densplot" + description: "path to output file (pdf) of 2D density scatter plot of duplicate\ + \ tag counts" + info: null + default: + - "$id.duprate_exp_densityplot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_duprate_exp_denscurve_mqc" + description: "path to output file (pdf) of density curve of gene duplication multiqc" + info: null + default: + - "$id.duprate_exp_density_curve_mqc.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_expression_histogram" + description: "path to output file (pdf) of distribution of RPK values per gene\ + \ histogram" + info: null + default: + - "$id.expression_hist.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_intercept_slope" + description: "output file (txt) with progression of duplication rate value" + info: null + default: + - "$id.intercept_slope.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "dupradar.r" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Assessment of duplication rates in RNA-Seq datasets\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam" +- type: "file" + path: "genes.gtf" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/dupradar.nf" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "r-base" + interactive: false + - type: "r" + bioc: + - "dupRadar" + bioc_force_install: false + warnings_as_errors: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/dupradar/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/dupradar" + executable: "target/nextflow/dupradar/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/dupradar/dupradar.r b/target/nextflow/dupradar/dupradar.r new file mode 100644 index 0000000..82218c6 --- /dev/null +++ b/target/nextflow/dupradar/dupradar.r @@ -0,0 +1,154 @@ +#!/usr/bin/env Rscript + +# Command line argument processing +args = commandArgs(trailingOnly=TRUE) +if (length(args) < 5) { + stop("Usage: dupRadar.r ", call.=FALSE) +} + +message("paired_end is", args[5]) +message("the type is is", class(args[5])) + +input_bam <- args[1] +output_prefix <- args[2] +annotation_gtf <- args[3] +stranded <- as.numeric(args[4]) +paired_end <- ifelse(args[5] == "true", TRUE, FALSE) +threads <- as.numeric(args[6]) + +bamRegex <- "(.+)\\.bam$" + +if(!(grepl(bamRegex, input_bam) && file.exists(input_bam) && (!file.info(input_bam)$isdir))) stop("First argument '' must be an existing file (not a directory) with '.bam' extension...") +if(!(file.exists(annotation_gtf) && (!file.info(annotation_gtf)$isdir))) stop("Third argument '' must be an existing file (and not a directory)...") +if(is.na(stranded) || (!(stranded %in% (0:2)))) stop("Fourth argument must be a numeric value in 0(unstranded)/1(forward)/2(reverse)...") +if(is.na(threads) || (threads<=0)) stop("Fifth argument must be a strictly positive numeric value...") + +# Debug messages (stderr) +message("Input bam (Arg 1): ", input_bam) +message("Output basename(Arg 2): ", output_prefix) +message("Input gtf (Arg 3): ", annotation_gtf) +message("Strandness (Arg 4): ", c("unstranded", "forward", "reverse")[stranded+1]) +message("paired_end (Arg 5): ", paired_end) +message("Nb threads (Arg 6): ", threads) +message("R package loc. (Arg 7): ", ifelse(length(args) > 4, args[5], "Not specified")) + + +# Load / install packages +if (length(args) > 5) { .libPaths( c( args[6], .libPaths() ) ) } +if (!require("dupRadar")){ + source("http://bioconductor.org/biocLite.R") + biocLite("dupRadar", suppressUpdates=TRUE) + library("dupRadar") +} +if (!require("parallel")) { + install.packages("parallel", dependencies=TRUE, repos='http://cloud.r-project.org/') + library("parallel") +} + +# Duplicate stats +dm <- analyzeDuprates(input_bam, annotation_gtf, stranded, paired_end, threads) +write.table(dm, file=paste(output_prefix, "_dupMatrix.txt", sep=""), quote=F, row.name=F, sep="\t") + +# 2D density scatter plot +pdf(paste0(output_prefix, "_duprateExpDens.pdf")) +duprateExpDensPlot(DupMat=dm) +title("Density scatter plot") +mtext(output_prefix, side=3) +dev.off() +fit <- duprateExpFit(DupMat=dm) +cat( + paste("- dupRadar Int (duprate at low read counts):", fit$intercept), + paste("- dupRadar Sl (progression of the duplication rate):", fit$slope), + fill=TRUE, labels=output_prefix, + file=paste0(output_prefix, "_intercept_slope.txt"), append=FALSE +) + +# Create a multiqc file dupInt +sample_name <- gsub("Aligned.sortedByCoord.out.markDups", "", output_prefix) +line="#id: DupInt +#plot_type: 'generalstats' +#pconfig: +# dupRadar_intercept: +# title: 'dupInt' +# namespace: 'DupRadar' +# description: 'Intercept value from DupRadar' +# max: 100 +# min: 0 +# scale: 'RdYlGn-rev' +# format: '{:.2f}%' +Sample dupRadar_intercept" + +write(line,file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE) +write(paste(sample_name, fit$intercept),file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE) + +# Get numbers from dupRadar GLM +curve_x <- sort(log10(dm$RPK)) +curve_y = 100*predict(fit$glm, data.frame(x=curve_x), type="response") +# Remove all of the infinite values +infs = which(curve_x %in% c(-Inf,Inf)) +curve_x = curve_x[-infs] +curve_y = curve_y[-infs] +# Reduce number of data points +curve_x <- curve_x[seq(1, length(curve_x), 10)] +curve_y <- curve_y[seq(1, length(curve_y), 10)] +# Convert x values back to real counts +curve_x = 10^curve_x +# Write to file +line="#id: dupradar +#section_name: 'DupRadar' +#section_href: 'bioconductor.org/packages/release/bioc/html/dupRadar.html' +#description: \"provides duplication rate quality control for RNA-Seq datasets. Highly expressed genes can be expected to have a lot of duplicate reads, but high numbers of duplicates at low read counts can indicate low library complexity with technical duplication. +# This plot shows the general linear models - a summary of the gene duplication distributions. \" +#pconfig: +# title: 'DupRadar General Linear Model' +# xLog: True +# xlab: 'expression (reads/kbp)' +# ylab: '% duplicate reads' +# ymax: 100 +# ymin: 0 +# tt_label: '{point.x:.1f} reads/kbp: {point.y:,.2f}% duplicates' +# xPlotLines: +# - color: 'green' +# dashStyle: 'LongDash' +# label: +# style: {color: 'green'} +# text: '0.5 RPKM' +# verticalAlign: 'bottom' +# y: -65 +# value: 0.5 +# width: 1 +# - color: 'red' +# dashStyle: 'LongDash' +# label: +# style: {color: 'red'} +# text: '1 read/bp' +# verticalAlign: 'bottom' +# y: -65 +# value: 1000 +# width: 1" + +write(line,file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"),append=TRUE) +write.table( + cbind(curve_x, curve_y), + file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"), + quote=FALSE, row.names=FALSE, col.names=FALSE, append=TRUE, +) + +# Distribution of expression box plot +pdf(paste0(output_prefix, "_duprateExpBoxplot.pdf")) +duprateExpBoxplot(DupMat=dm) +title("Percent Duplication by Expression") +mtext(output_prefix, side=3) +dev.off() + +# Distribution of RPK values per gene +pdf(paste0(output_prefix, "_expressionHist.pdf")) +expressionHist(DupMat=dm) +title("Distribution of RPK values per gene") +mtext(output_prefix, side=3) +dev.off() + +# Print sessioninfo to standard out +print(output_prefix) +citation("dupRadar") +sessionInfo() diff --git a/target/nextflow/dupradar/main.nf b/target/nextflow/dupradar/main.nf new file mode 100644 index 0000000..242bfb5 --- /dev/null +++ b/target/nextflow/dupradar/main.nf @@ -0,0 +1,3992 @@ +// dupradar v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "dupradar", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "Sample ID", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input", + "description" : "path to input alignment file in BAM format", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf_annotation", + "description" : "path to GTF annotation file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--paired", + "description" : "add flag if input alignment file consists of paired reads", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--strandedness", + "description" : "strandedness of input bam file reads (forward, reverse or unstranded (default, applicable to paired reads))", + "required" : false, + "choices" : [ + "forward", + "reverse", + "unstranded" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output_dupmatrix", + "description" : "path to output file (txt) of duplicate tag counts", + "default" : [ + "$id.dup_matrix.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_dup_intercept_mqc", + "description" : "path to output file (txt) of multiqc intercept value DupRadar", + "default" : [ + "$id.dup_intercept_mqc.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_duprate_exp_boxplot", + "description" : "path to output file (pdf) of distribution of expression box plot", + "default" : [ + "$id.duprate_exp_boxplot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_duprate_exp_densplot", + "description" : "path to output file (pdf) of 2D density scatter plot of duplicate tag counts", + "default" : [ + "$id.duprate_exp_densityplot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_duprate_exp_denscurve_mqc", + "description" : "path to output file (pdf) of density curve of gene duplication multiqc", + "default" : [ + "$id.duprate_exp_density_curve_mqc.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_expression_histogram", + "description" : "path to output file (pdf) of distribution of RPK values per gene histogram", + "default" : [ + "$id.expression_hist.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_intercept_slope", + "description" : "output file (txt) with progression of duplication rate value", + "default" : [ + "$id.intercept_slope.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "dupradar.r" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Assessment of duplication rates in RNA-Seq datasets\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam" + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/genes.gtf" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/local/dupradar.nf" + ], + "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "r-base" + ], + "interactive" : false + }, + { + "type" : "r", + "bioc" : [ + "dupRadar" + ], + "bioc_force_install" : false, + "warnings_as_errors" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/dupradar/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/dupradar", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_GTF_ANNOTATION+x} ]; then echo "${VIASH_PAR_GTF_ANNOTATION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf_annotation='&'#" ; else echo "# par_gtf_annotation="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPMATRIX+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPMATRIX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_dupmatrix='&'#" ; else echo "# par_output_dupmatrix="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC+x} ]; then echo "${VIASH_PAR_OUTPUT_DUP_INTERCEPT_MQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_dup_intercept_mqc='&'#" ; else echo "# par_output_dup_intercept_mqc="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPRATE_EXP_BOXPLOT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_duprate_exp_boxplot='&'#" ; else echo "# par_output_duprate_exp_boxplot="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSPLOT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_duprate_exp_densplot='&'#" ; else echo "# par_output_duprate_exp_densplot="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_duprate_exp_denscurve_mqc='&'#" ; else echo "# par_output_duprate_exp_denscurve_mqc="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM+x} ]; then echo "${VIASH_PAR_OUTPUT_EXPRESSION_HISTOGRAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_expression_histogram='&'#" ; else echo "# par_output_expression_histogram="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_INTERCEPT_SLOPE+x} ]; then echo "${VIASH_PAR_OUTPUT_INTERCEPT_SLOPE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_intercept_slope='&'#" ; else echo "# par_output_intercept_slope="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -exo pipefail + +function num_strandness { + if [ \\$par_strandedness == 'unstranded' ]; then echo 0 + elif [ \\$par_strandedness == 'forward' ]; then echo 1 + elif [ \\$par_strandedness == 'reverse' ]; then echo 2 + else echo "strandedness must be unstranded, forward or reverse." && \\\\ + exit 1 + fi +} + +Rscript "\\$meta_resources_dir/dupradar.r" \\\\ + \\$par_input \\\\ + \\$par_id \\\\ + \\$par_gtf_annotation \\\\ + \\$(num_strandness) \\\\ + \\$par_paired \\\\ + \\${meta_cpus:-1} + +mv "\\$par_id"_dupMatrix.txt \\$par_output_dupmatrix +mv "\\$par_id"_dup_intercept_mqc.txt \\$par_output_dup_intercept_mqc +mv "\\$par_id"_duprateExpBoxplot.pdf \\$par_output_duprate_exp_boxplot +mv "\\$par_id"_duprateExpDens.pdf \\$par_output_duprate_exp_densplot +mv "\\$par_id"_duprateExpDensCurve_mqc.txt \\$par_output_duprate_exp_denscurve_mqc +mv "\\$par_id"_expressionHist.pdf \\$par_output_expression_histogram +mv "\\$par_id"_intercept_slope.txt \\$par_output_intercept_slope +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/dupradar", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/dupradar/nextflow.config b/target/nextflow/dupradar/nextflow.config new file mode 100644 index 0000000..aa02b68 --- /dev/null +++ b/target/nextflow/dupradar/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'dupradar' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Assessment of duplication rates in RNA-Seq datasets\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/dupradar/nextflow_labels.config b/target/nextflow/dupradar/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/dupradar/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/dupradar/nextflow_schema.json b/target/nextflow/dupradar/nextflow_schema.json new file mode 100644 index 0000000..afa1331 --- /dev/null +++ b/target/nextflow/dupradar/nextflow_schema.json @@ -0,0 +1,203 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "dupradar", +"description": "Assessment of duplication rates in RNA-Seq datasets\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "string", + "description": "Type: `string`. Sample ID", + "help_text": "Type: `string`. Sample ID" + + } + + + , + "input": { + "type": + "string", + "description": "Type: `file`, required. path to input alignment file in BAM format", + "help_text": "Type: `file`, required. path to input alignment file in BAM format" + + } + + + , + "gtf_annotation": { + "type": + "string", + "description": "Type: `file`, required. path to GTF annotation file", + "help_text": "Type: `file`, required. path to GTF annotation file." + + } + + + , + "paired": { + "type": + "boolean", + "description": "Type: `boolean`. add flag if input alignment file consists of paired reads", + "help_text": "Type: `boolean`. add flag if input alignment file consists of paired reads" + + } + + + , + "strandedness": { + "type": + "string", + "description": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. strandedness of input bam file reads (forward, reverse or unstranded (default, applicable to paired reads))", + "help_text": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. strandedness of input bam file reads (forward, reverse or unstranded (default, applicable to paired reads))", + "enum": ["forward", "reverse", "unstranded"] + + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_dupmatrix": { + "type": + "string", + "description": "Type: `file`, default: `$id.dup_matrix.txt`. path to output file (txt) of duplicate tag counts", + "help_text": "Type: `file`, default: `$id.dup_matrix.txt`. path to output file (txt) of duplicate tag counts" + , + "default":"$id.dup_matrix.txt" + } + + + , + "output_dup_intercept_mqc": { + "type": + "string", + "description": "Type: `file`, default: `$id.dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar", + "help_text": "Type: `file`, default: `$id.dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar" + , + "default":"$id.dup_intercept_mqc.txt" + } + + + , + "output_duprate_exp_boxplot": { + "type": + "string", + "description": "Type: `file`, default: `$id.duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot", + "help_text": "Type: `file`, default: `$id.duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot" + , + "default":"$id.duprate_exp_boxplot.pdf" + } + + + , + "output_duprate_exp_densplot": { + "type": + "string", + "description": "Type: `file`, default: `$id.duprate_exp_densityplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts", + "help_text": "Type: `file`, default: `$id.duprate_exp_densityplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts" + , + "default":"$id.duprate_exp_densityplot.pdf" + } + + + , + "output_duprate_exp_denscurve_mqc": { + "type": + "string", + "description": "Type: `file`, default: `$id.duprate_exp_density_curve_mqc.txt`. path to output file (pdf) of density curve of gene duplication multiqc", + "help_text": "Type: `file`, default: `$id.duprate_exp_density_curve_mqc.txt`. path to output file (pdf) of density curve of gene duplication multiqc" + , + "default":"$id.duprate_exp_density_curve_mqc.txt" + } + + + , + "output_expression_histogram": { + "type": + "string", + "description": "Type: `file`, default: `$id.expression_hist.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram", + "help_text": "Type: `file`, default: `$id.expression_hist.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram" + , + "default":"$id.expression_hist.pdf" + } + + + , + "output_intercept_slope": { + "type": + "string", + "description": "Type: `file`, default: `$id.intercept_slope.txt`. output file (txt) with progression of duplication rate value", + "help_text": "Type: `file`, default: `$id.intercept_slope.txt`. output file (txt) with progression of duplication rate value" + , + "default":"$id.intercept_slope.txt" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/getchromsizes/.config.vsh.yaml b/target/nextflow/getchromsizes/.config.vsh.yaml new file mode 100644 index 0000000..3fd3412 --- /dev/null +++ b/target/nextflow/getchromsizes/.config.vsh.yaml @@ -0,0 +1,207 @@ +name: "getchromsizes" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--fasta" + description: "Genome fasta files" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--sizes" + description: "File containing chromosome lengths" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fai" + description: "FASTA index file" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gzi" + description: "Optional gzip index file for compressed inputs" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Generates a FASTA file of chromosome sizes and a fasta index file.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "genome.fasta" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/custom/getchromsizes/main.nf" + - "modules/nf-core/custom/getchromsizes/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && \\\napt-get install -y autoconf automake make gcc perl zlib1g-dev\ + \ libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev libncurses5-dev curl\ + \ bzip2 && \\\ncurl -fsSL https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2\ + \ -o samtools-1.18.tar.bz2 && \\\ntar -xjf samtools-1.18.tar.bz2 && \\\nrm samtools-1.18.tar.bz2\ + \ && \\\ncd samtools-1.18 && \\\n./configure && \\\nmake && \\\nmake install\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/getchromsizes/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/getchromsizes" + executable: "target/nextflow/getchromsizes/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/getchromsizes/main.nf b/target/nextflow/getchromsizes/main.nf new file mode 100644 index 0000000..24a5f45 --- /dev/null +++ b/target/nextflow/getchromsizes/main.nf @@ -0,0 +1,3841 @@ +// getchromsizes v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "getchromsizes", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--fasta", + "description" : "Genome fasta files", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--sizes", + "description" : "File containing chromosome lengths", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fai", + "description" : "FASTA index file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gzi", + "description" : "Optional gzip index file for compressed inputs", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Generates a FASTA file of chromosome sizes and a fasta index file.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/minimal_test/reference/genome.fasta" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/custom/getchromsizes/main.nf", + "modules/nf-core/custom/getchromsizes/meta.yml" + ], + "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\napt-get install -y autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev libncurses5-dev curl bzip2 && \\\\\ncurl -fsSL https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2 -o samtools-1.18.tar.bz2 && \\\\\ntar -xjf samtools-1.18.tar.bz2 && \\\\\nrm samtools-1.18.tar.bz2 && \\\\\ncd samtools-1.18 && \\\\\n./configure && \\\\\nmake && \\\\\nmake install\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/getchromsizes/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/getchromsizes", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_FASTA+x} ]; then echo "${VIASH_PAR_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fasta='&'#" ; else echo "# par_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_SIZES+x} ]; then echo "${VIASH_PAR_SIZES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sizes='&'#" ; else echo "# par_sizes="; fi ) +$( if [ ! -z ${VIASH_PAR_FAI+x} ]; then echo "${VIASH_PAR_FAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fai='&'#" ; else echo "# par_fai="; fi ) +$( if [ ! -z ${VIASH_PAR_GZI+x} ]; then echo "${VIASH_PAR_GZI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gzi='&'#" ; else echo "# par_gzi="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +filename="\\$(basename -- \\$par_fasta)" + +samtools faidx \\$par_fasta +cut -f 1,2 "\\$par_fasta.fai" > \\$par_sizes +mv "\\$par_fasta.fai" \\$par_fai +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/getchromsizes", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/getchromsizes/nextflow.config b/target/nextflow/getchromsizes/nextflow.config new file mode 100644 index 0000000..5fe1936 --- /dev/null +++ b/target/nextflow/getchromsizes/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'getchromsizes' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Generates a FASTA file of chromosome sizes and a fasta index file.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/getchromsizes/nextflow_labels.config b/target/nextflow/getchromsizes/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/getchromsizes/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/getchromsizes/nextflow_schema.json b/target/nextflow/getchromsizes/nextflow_schema.json new file mode 100644 index 0000000..c59b3c1 --- /dev/null +++ b/target/nextflow/getchromsizes/nextflow_schema.json @@ -0,0 +1,117 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "getchromsizes", +"description": "Generates a FASTA file of chromosome sizes and a fasta index file.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "fasta": { + "type": + "string", + "description": "Type: `file`. Genome fasta files", + "help_text": "Type: `file`. Genome fasta files" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "sizes": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.sizes`. File containing chromosome lengths", + "help_text": "Type: `file`, default: `$id.$key.sizes`. File containing chromosome lengths" + , + "default":"$id.$key.sizes" + } + + + , + "fai": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.fai`. FASTA index file", + "help_text": "Type: `file`, default: `$id.$key.fai`. FASTA index file" + , + "default":"$id.$key.fai" + } + + + , + "gzi": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.gzi`. Optional gzip index file for compressed inputs", + "help_text": "Type: `file`, default: `$id.$key.gzi`. Optional gzip index file for compressed inputs" + , + "default":"$id.$key.gzi" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/gtf2bed/.config.vsh.yaml b/target/nextflow/gtf2bed/.config.vsh.yaml new file mode 100644 index 0000000..8049024 --- /dev/null +++ b/target/nextflow/gtf2bed/.config.vsh.yaml @@ -0,0 +1,185 @@ +name: "gtf2bed" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--gtf" + description: "A reference file in GTF format." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: " Output" + arguments: + - type: "file" + name: "--bed_output" + description: "BED file resulting from the conversion of the GTF input file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "gtf2bed.pl" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Create BED annotation file from GTF.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "genes.gtf.gz" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/gtf2bed.nf" + last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "perl" + interactive: false + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/gtf2bed/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/gtf2bed" + executable: "target/nextflow/gtf2bed/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/gtf2bed/gtf2bed.pl b/target/nextflow/gtf2bed/gtf2bed.pl new file mode 100755 index 0000000..4da5c04 --- /dev/null +++ b/target/nextflow/gtf2bed/gtf2bed.pl @@ -0,0 +1,122 @@ +#!/usr/bin/env perl + +# Copyright (c) 2011 Erik Aronesty (erik@q32.com) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ALSO, IT WOULD BE NICE IF YOU LET ME KNOW YOU USED IT. + +use Getopt::Long; + +my $extended; +GetOptions("x"=>\$extended); + +$in = shift @ARGV; + +my $in_cmd =($in =~ /\.gz$/ ? "gunzip -c $in|" : $in =~ /\.zip$/ ? "unzip -p $in|" : "$in") || die "Can't open $in: $!\n"; +open IN, $in_cmd; + +while () { + $gff = 2 if /^##gff-version 2/; + $gff = 3 if /^##gff-version 3/; + next if /^#/ && $gff; + + s/\s+$//; + # 0-chr 1-src 2-feat 3-beg 4-end 5-scor 6-dir 7-fram 8-attr + my @f = split /\t/; + if ($gff) { + # most ver 2's stick gene names in the id field + ($id) = $f[8]=~ /\bID="([^"]+)"/; + # most ver 3's stick unquoted names in the name field + ($id) = $f[8]=~ /\bName=([^";]+)/ if !$id && $gff == 3; + } else { + ($id) = $f[8]=~ /transcript_id "([^"]+)"/; + } + + next unless $id && $f[0]; + + if ($f[2] eq 'exon') { + die "no position at exon on line $." if ! $f[3]; + # gff3 puts :\d in exons sometimes + $id =~ s/:\d+$// if $gff == 3; + push @{$exons{$id}}, \@f; + # save lowest start + $trans{$id} = \@f if !$trans{$id}; + } elsif ($f[2] eq 'start_codon') { + #optional, output codon start/stop as "thick" region in bed + $sc{$id}->[0] = $f[3]; + } elsif ($f[2] eq 'stop_codon') { + $sc{$id}->[1] = $f[4]; + } elsif ($f[2] eq 'miRNA' ) { + $trans{$id} = \@f if !$trans{$id}; + push @{$exons{$id}}, \@f; + } +} + +for $id ( + # sort by chr then pos + sort { + $trans{$a}->[0] eq $trans{$b}->[0] ? + $trans{$a}->[3] <=> $trans{$b}->[3] : + $trans{$a}->[0] cmp $trans{$b}->[0] + } (keys(%trans)) ) { + my ($chr, undef, undef, undef, undef, undef, $dir, undef, $attr, undef, $cds, $cde) = @{$trans{$id}}; + my ($cds, $cde); + ($cds, $cde) = @{$sc{$id}} if $sc{$id}; + + # sort by pos + my @ex = sort { + $a->[3] <=> $b->[3] + } @{$exons{$id}}; + + my $beg = $ex[0][3]; + my $end = $ex[-1][4]; + + if ($dir eq '-') { + # swap + $tmp=$cds; + $cds=$cde; + $cde=$tmp; + $cds -= 2 if $cds; + $cde += 2 if $cde; + } + + # not specified, just use exons + $cds = $beg if !$cds; + $cde = $end if !$cde; + + # adjust start for bed + --$beg; --$cds; + + my $exn = @ex; # exon count + my $exst = join ",", map {$_->[3]-$beg-1} @ex; # exon start + my $exsz = join ",", map {$_->[4]-$_->[3]+1} @ex; # exon size + + my $gene_id; + my $extend = ""; + if ($extended) { + ($gene_id) = $attr =~ /gene_name "([^"]+)"/; + ($gene_id) = $attr =~ /gene_id "([^"]+)"/ unless $gene_id; + $extend="\t$gene_id"; + } + # added an extra comma to make it look exactly like ucsc's beds + print "$chr\t$beg\t$end\t$id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n"; +} + +close IN; diff --git a/target/nextflow/gtf2bed/main.nf b/target/nextflow/gtf2bed/main.nf new file mode 100644 index 0000000..a0fc538 --- /dev/null +++ b/target/nextflow/gtf2bed/main.nf @@ -0,0 +1,3817 @@ +// gtf2bed v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "gtf2bed", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--gtf", + "description" : "A reference file in GTF format.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : " Output", + "arguments" : [ + { + "type" : "file", + "name" : "--bed_output", + "description" : "BED file resulting from the conversion of the GTF input file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "gtf2bed.pl" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Create BED annotation file from GTF.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/minimal_test/reference/genes.gtf.gz" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/local/gtf2bed.nf" + ], + "last_sha" : "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "perl" + ], + "interactive" : false + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/gtf2bed/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/gtf2bed", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_BED_OUTPUT+x} ]; then echo "${VIASH_PAR_BED_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bed_output='&'#" ; else echo "# par_bed_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +perl "\\$meta_resources_dir/gtf2bed.pl" \\$par_gtf > \\$par_bed_output +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/gtf2bed", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/gtf2bed/nextflow.config b/target/nextflow/gtf2bed/nextflow.config new file mode 100644 index 0000000..8bc6f44 --- /dev/null +++ b/target/nextflow/gtf2bed/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'gtf2bed' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Create BED annotation file from GTF.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/gtf2bed/nextflow_labels.config b/target/nextflow/gtf2bed/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/gtf2bed/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/gtf2bed/nextflow_schema.json b/target/nextflow/gtf2bed/nextflow_schema.json new file mode 100644 index 0000000..d459f39 --- /dev/null +++ b/target/nextflow/gtf2bed/nextflow_schema.json @@ -0,0 +1,95 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "gtf2bed", +"description": "Create BED annotation file from GTF.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "gtf": { + "type": + "string", + "description": "Type: `file`, required. A reference file in GTF format", + "help_text": "Type: `file`, required. A reference file in GTF format." + + } + + +} +}, + + + " output" : { + "title": " Output", + "type": "object", + "description": "No description", + "properties": { + + + "bed_output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.bed_output`. BED file resulting from the conversion of the GTF input file", + "help_text": "Type: `file`, required, default: `$id.$key.bed_output`. BED file resulting from the conversion of the GTF input file." + , + "default":"$id.$key.bed_output" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/ output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/gtf_filter/.config.vsh.yaml b/target/nextflow/gtf_filter/.config.vsh.yaml new file mode 100644 index 0000000..0f658e7 --- /dev/null +++ b/target/nextflow/gtf_filter/.config.vsh.yaml @@ -0,0 +1,195 @@ +name: "gtf_filter" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--fasta" + description: "Genome fasta file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "GTF file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--skip_transcript_id_check" + description: "Skip checking for transcript IDs in the GTF file." + info: null + direction: "input" +- name: " Output" + arguments: + - type: "file" + name: "--filtered_gtf" + description: "Filtered GTF file containing only sequences in the FASTA file" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "python_script" + path: "script.py" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Filters a GTF file based on sequence names in a FASTA file.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "genome.fasta" +- type: "file" + path: "genes.gtf.gz" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/gtf_filter.nf" + last_sha: "1c6012ecbb087014ea4b8f0f3d39b874850277a8" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/gtf_filter/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/gtf_filter" + executable: "target/nextflow/gtf_filter/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/gtf_filter/main.nf b/target/nextflow/gtf_filter/main.nf new file mode 100644 index 0000000..0469186 --- /dev/null +++ b/target/nextflow/gtf_filter/main.nf @@ -0,0 +1,3876 @@ +// gtf_filter v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "gtf_filter", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--fasta", + "description" : "Genome fasta file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf", + "description" : "GTF file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--skip_transcript_id_check", + "description" : "Skip checking for transcript IDs in the GTF file.", + "direction" : "input" + } + ] + }, + { + "name" : " Output", + "arguments" : [ + { + "type" : "file", + "name" : "--filtered_gtf", + "description" : "Filtered GTF file containing only sequences in the FASTA file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Filters a GTF file based on sequence names in a FASTA file.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/minimal_test/reference/genome.fasta" + }, + { + "type" : "file", + "path" : "/testData/minimal_test/reference/genes.gtf.gz" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/local/gtf_filter.nf" + ], + "last_sha" : "1c6012ecbb087014ea4b8f0f3d39b874850277a8" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/gtf_filter/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/gtf_filter", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.py" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'fasta': $( if [ ! -z ${VIASH_PAR_FASTA+x} ]; then echo "r'${VIASH_PAR_FASTA//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'gtf': $( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "r'${VIASH_PAR_GTF//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'skip_transcript_id_check': $( if [ ! -z ${VIASH_PAR_SKIP_TRANSCRIPT_ID_CHECK+x} ]; then echo "r'${VIASH_PAR_SKIP_TRANSCRIPT_ID_CHECK//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'filtered_gtf': $( if [ ! -z ${VIASH_PAR_FILTERED_GTF+x} ]; then echo "r'${VIASH_PAR_FILTERED_GTF//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +dep = { + +} + +## VIASH END +# Adapted from https://github.com/nf-core/rnaseq/blob/3.14.0/bin/filter_gtf.py + +import os +import sys +import re +import statistics +from typing import Set + +def extract_fasta_seq_names(fasta_name: str) -> Set[str]: + """Extracts the sequence names from a FASTA file.""" + with open(fasta_name) as fasta: + return {line[1:].split(None, 1)[0] for line in fasta if line.startswith(">")} + +def tab_delimited(file: str) -> float: + """Check if file is tab-delimited and return median number of tabs.""" + with open(file, "r") as f: + data = f.read(102400) + return statistics.median(line.count("\\\\t") for line in data.split("\\\\n")) + +def filter_gtf(fasta: str, gtf_in: str, filtered_gtf_out: str, skip_transcript_id_check: bool) -> None: + """Filter GTF file based on FASTA sequence names.""" + if tab_delimited(gtf_in) != 8: + raise ValueError("Invalid GTF file: Expected 9 tab-separated columns.") + seq_names_in_genome = extract_fasta_seq_names(fasta) + print(f"Extracted chromosome sequence names from {fasta}") + print("All sequence IDs from FASTA: " + ", ".join(sorted(seq_names_in_genome))) + seq_names_in_gtf = set() + try: + with open(gtf_in) as gtf, open(filtered_gtf_out, "w") as out: + line_count = 0 + for line in gtf: + seq_name = line.split("\\\\t")[0] + seq_names_in_gtf.add(seq_name) # Add sequence name to the set + if seq_name in seq_names_in_genome: + if skip_transcript_id_check or re.search(r'transcript_id "([^"]+)"', line): + out.write(line) + line_count += 1 + if line_count == 0: + raise ValueError("All GTF lines removed by filters") + except IOError as e: + print(f"File operation failed: {e}") + return + + print("All sequence IDs from GTF: " + ", ".join(sorted(seq_names_in_gtf))) + print(f"Extracted {line_count} matching sequences from {gtf_in} into {filtered_gtf_out}") + +filter_gtf(par["fasta"], par["gtf"], par["filtered_gtf"], par["skip_transcript_id_check"]) +VIASHMAIN +python -B "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/gtf_filter", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/gtf_filter/nextflow.config b/target/nextflow/gtf_filter/nextflow.config new file mode 100644 index 0000000..39d8595 --- /dev/null +++ b/target/nextflow/gtf_filter/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'gtf_filter' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Filters a GTF file based on sequence names in a FASTA file.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/gtf_filter/nextflow_labels.config b/target/nextflow/gtf_filter/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/gtf_filter/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/gtf_filter/nextflow_schema.json b/target/nextflow/gtf_filter/nextflow_schema.json new file mode 100644 index 0000000..89fc1ac --- /dev/null +++ b/target/nextflow/gtf_filter/nextflow_schema.json @@ -0,0 +1,116 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "gtf_filter", +"description": "Filters a GTF file based on sequence names in a FASTA file.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "fasta": { + "type": + "string", + "description": "Type: `file`. Genome fasta file", + "help_text": "Type: `file`. Genome fasta file" + + } + + + , + "gtf": { + "type": + "string", + "description": "Type: `file`. GTF file", + "help_text": "Type: `file`. GTF file" + + } + + + , + "skip_transcript_id_check": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip checking for transcript IDs in the GTF file", + "help_text": "Type: `boolean_true`, default: `false`. Skip checking for transcript IDs in the GTF file." + , + "default":false + } + + +} +}, + + + " output" : { + "title": " Output", + "type": "object", + "description": "No description", + "properties": { + + + "filtered_gtf": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.filtered_gtf`. Filtered GTF file containing only sequences in the FASTA file", + "help_text": "Type: `file`, default: `$id.$key.filtered_gtf`. Filtered GTF file containing only sequences in the FASTA file" + , + "default":"$id.$key.filtered_gtf" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/ output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/gunzip/.config.vsh.yaml b/target/nextflow/gunzip/.config.vsh.yaml new file mode 100644 index 0000000..2941e47 --- /dev/null +++ b/target/nextflow/gunzip/.config.vsh.yaml @@ -0,0 +1,184 @@ +name: "gunzip" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "Path of file to be uncompressed" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + description: "Decompressed file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Compress or uncompress a file or list of files.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "genes.gff.gz" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/gunzip/main.nf" + - "modules/nf-core/gunzip/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "gzip" + interactive: false + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/gunzip/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/gunzip" + executable: "target/nextflow/gunzip/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/gunzip/main.nf b/target/nextflow/gunzip/main.nf new file mode 100644 index 0000000..356093b --- /dev/null +++ b/target/nextflow/gunzip/main.nf @@ -0,0 +1,3820 @@ +// gunzip v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "gunzip", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Path of file to be uncompressed", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Decompressed file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Compress or uncompress a file or list of files.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/minimal_test/reference/genes.gff.gz" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/gunzip/main.nf", + "modules/nf-core/gunzip/meta.yml" + ], + "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "gzip" + ], + "interactive" : false + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/gunzip/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/gunzip", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +filename="\\$(basename -- "\\$par_input")" + +if [ \\${filename##*.} == "gz" ]; then + gunzip -c \\$par_input > \\$par_output +else + cat \\$par_input > \\$par_output +fi +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/gunzip", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/gunzip/nextflow.config b/target/nextflow/gunzip/nextflow.config new file mode 100644 index 0000000..f656770 --- /dev/null +++ b/target/nextflow/gunzip/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'gunzip' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Compress or uncompress a file or list of files.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/gunzip/nextflow_labels.config b/target/nextflow/gunzip/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/gunzip/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/gunzip/nextflow_schema.json b/target/nextflow/gunzip/nextflow_schema.json new file mode 100644 index 0000000..798f4f3 --- /dev/null +++ b/target/nextflow/gunzip/nextflow_schema.json @@ -0,0 +1,95 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "gunzip", +"description": "Compress or uncompress a file or list of files.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Path of file to be uncompressed", + "help_text": "Type: `file`, required. Path of file to be uncompressed" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output`. Decompressed file", + "help_text": "Type: `file`, required, default: `$id.$key.output`. Decompressed file." + , + "default":"$id.$key.output" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/multiqc_custom_biotype/.config.vsh.yaml b/target/nextflow/multiqc_custom_biotype/.config.vsh.yaml new file mode 100644 index 0000000..16f265b --- /dev/null +++ b/target/nextflow/multiqc_custom_biotype/.config.vsh.yaml @@ -0,0 +1,205 @@ +name: "multiqc_custom_biotype" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--biocounts" + description: "File with all biocounts" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--id" + description: "Sample name" + info: null + default: + - "$id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--features" + description: "Features to count" + info: null + default: + - "rRNA" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--featurecounts_multiqc" + info: null + default: + - "$id.biotype_counts_mqc.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_rrna_multiqc" + info: null + default: + - "$id.biotype_counts_rrna_mqc.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "python_script" + path: "script.py" + is_executable: true +- type: "file" + path: "biotypes_header.txt" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Calculate features percentage for biotype counts" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python:latest" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/multiqc_custom_biotype/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/multiqc_custom_biotype" + executable: "target/nextflow/multiqc_custom_biotype/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/multiqc_custom_biotype/biotypes_header.txt b/target/nextflow/multiqc_custom_biotype/biotypes_header.txt new file mode 100644 index 0000000..dff52c0 --- /dev/null +++ b/target/nextflow/multiqc_custom_biotype/biotypes_header.txt @@ -0,0 +1,11 @@ +# id: 'biotype_counts' +# section_name: 'Biotype Counts' +# description: "shows reads overlapping genomic features of different biotypes, +# counted by featureCounts." +# plot_type: 'bargraph' +# anchor: 'featurecounts_biotype' +# pconfig: +# id: "featurecounts_biotype_plot" +# title: "featureCounts: Biotypes" +# xlab: "# Reads" +# cpswitch_counts_label: "Number of Reads" diff --git a/target/nextflow/multiqc_custom_biotype/main.nf b/target/nextflow/multiqc_custom_biotype/main.nf new file mode 100644 index 0000000..e9312f3 --- /dev/null +++ b/target/nextflow/multiqc_custom_biotype/main.nf @@ -0,0 +1,3930 @@ +// multiqc_custom_biotype v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "multiqc_custom_biotype", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--biocounts", + "description" : "File with all biocounts", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--id", + "description" : "Sample name", + "default" : [ + "$id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--features", + "description" : "Features to count", + "default" : [ + "rRNA" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--featurecounts_multiqc", + "default" : [ + "$id.biotype_counts_mqc.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--featurecounts_rrna_multiqc", + "default" : [ + "$id.biotype_counts_rrna_mqc.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true + }, + { + "type" : "file", + "path" : "biotypes_header.txt" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Calculate features percentage for biotype counts", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:latest", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/multiqc_custom_biotype/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/multiqc_custom_biotype", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.py" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'biocounts': $( if [ ! -z ${VIASH_PAR_BIOCOUNTS+x} ]; then echo "r'${VIASH_PAR_BIOCOUNTS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'id': $( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "r'${VIASH_PAR_ID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'features': $( if [ ! -z ${VIASH_PAR_FEATURES+x} ]; then echo "r'${VIASH_PAR_FEATURES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'featurecounts_multiqc': $( if [ ! -z ${VIASH_PAR_FEATURECOUNTS_MULTIQC+x} ]; then echo "r'${VIASH_PAR_FEATURECOUNTS_MULTIQC//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'featurecounts_rrna_multiqc': $( if [ ! -z ${VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC+x} ]; then echo "r'${VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +dep = { + +} + +## VIASH END +#!/usr/bin/env python3 + +import argparse +import logging +import os + +# Create a logger +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__file__) +logger.setLevel(logging.INFO) + +mqc_main = """#id: 'biotype-gs' +#plot_type: 'generalstats' +#pconfig:""" + +mqc_pconf = """# percent_{ft}: +# title: '% {ft}' +# namespace: 'Biotype Counts' +# description: '% reads overlapping {ft} features' +# max: 100 +# min: 0 +# scale: 'RdYlGn-rev' +# format: '{{:.2f}}%'""" + + +def mqc_feature_stat(bfile, features, outfile, sname=None): + # If sample name not given use file name + if not sname: + sname = os.path.splitext(os.path.basename(bfile))[0] + + # Try to parse and read biocount file + fcounts = {} + try: + with open(bfile, "r") as bfl: + for ln in bfl: + if ln.startswith("#"): + continue + ft, cn = ln.strip().split("\\\\t") + fcounts[ft] = float(cn) + except: + logger.error("Trouble reading the biocount file {}".format(bfile)) + return + + total_count = sum(fcounts.values()) + if total_count == 0: + logger.error("No biocounts found, exiting") + return + + # Calculate percentage for each requested feature + fpercent = {f: (fcounts[f] / total_count) * 100 if f in fcounts else 0 for f in features} + if len(fpercent) == 0: + logger.error("Any of given features '{}' not found in the biocount file".format(", ".join(features), bfile)) + return + + # Prepare the output strings + out_head, out_value, out_mqc = ("Sample", "'{}'".format(sname), mqc_main) + for ft, pt in fpercent.items(): + out_head = "{}\\\\tpercent_{}".format(out_head, ft) + out_value = "{}\\\\t{}".format(out_value, pt) + out_mqc = "{}\\\\n{}".format(out_mqc, mqc_pconf.format(ft=ft)) + + # Write the output to a file + with open(outfile, "w") as ofl: + out_final = "\\\\n".join([out_mqc, out_head, out_value]).strip() + ofl.write(out_final + "\\\\n") + + +if __name__ == "__main__": + + # Read the biotypes_header.txt file + biotypes_header_path = os.path.join(meta["resources_dir"], 'biotypes_header.txt') + with open(biotypes_header_path, 'r') as header_file: + biotypes_header = header_file.read() + + # Extract specific columns (1 and 7) and skip the first two lines + filtered_lines = [] + with open(par["biocounts"], 'r') as biocounts_file: + for i, line in enumerate(biocounts_file): + if i >= 2: # Skipping first two lines + columns = line.strip().split('\\\\t') + filtered_line = f"{columns[0]}\\\\t{columns[6]}" # Columns 1 and 7 (0-indexed) + filtered_lines.append(filtered_line) + + # Concatenate the header and the processed lines + result = biotypes_header + '\\\\n'.join(filtered_lines) + '\\\\n' + + # Write the result to par_featurecounts_multiqc + with open(par["featurecounts_multiqc"], 'w') as output_file: + output_file.write(result) + + mqc_feature_stat(par["featurecounts_multiqc"], par["features"], par["featurecounts_rrna_multiqc"], par["id"]) +VIASHMAIN +python -B "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/multiqc_custom_biotype", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/multiqc_custom_biotype/nextflow.config b/target/nextflow/multiqc_custom_biotype/nextflow.config new file mode 100644 index 0000000..5939cfa --- /dev/null +++ b/target/nextflow/multiqc_custom_biotype/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'multiqc_custom_biotype' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Calculate features percentage for biotype counts' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/multiqc_custom_biotype/nextflow_labels.config b/target/nextflow/multiqc_custom_biotype/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/multiqc_custom_biotype/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/multiqc_custom_biotype/nextflow_schema.json b/target/nextflow/multiqc_custom_biotype/nextflow_schema.json new file mode 100644 index 0000000..8645a76 --- /dev/null +++ b/target/nextflow/multiqc_custom_biotype/nextflow_schema.json @@ -0,0 +1,128 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "multiqc_custom_biotype", +"description": "Calculate features percentage for biotype counts", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "biocounts": { + "type": + "string", + "description": "Type: `file`. File with all biocounts", + "help_text": "Type: `file`. File with all biocounts" + + } + + + , + "id": { + "type": + "string", + "description": "Type: `string`, default: `$id`. Sample name", + "help_text": "Type: `string`, default: `$id`. Sample name" + , + "default":"$id" + } + + + , + "features": { + "type": + "string", + "description": "Type: `string`, default: `rRNA`. Features to count", + "help_text": "Type: `string`, default: `rRNA`. Features to count" + , + "default":"rRNA" + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "featurecounts_multiqc": { + "type": + "string", + "description": "Type: `file`, default: `$id.biotype_counts_mqc.tsv`. ", + "help_text": "Type: `file`, default: `$id.biotype_counts_mqc.tsv`. " + , + "default":"$id.biotype_counts_mqc.tsv" + } + + + , + "featurecounts_rrna_multiqc": { + "type": + "string", + "description": "Type: `file`, default: `$id.biotype_counts_rrna_mqc.tsv`. ", + "help_text": "Type: `file`, default: `$id.biotype_counts_rrna_mqc.tsv`. " + , + "default":"$id.biotype_counts_rrna_mqc.tsv" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/picard_markduplicates/.config.vsh.yaml b/target/nextflow/picard_markduplicates/.config.vsh.yaml new file mode 100644 index 0000000..0529c5a --- /dev/null +++ b/target/nextflow/picard_markduplicates/.config.vsh.yaml @@ -0,0 +1,247 @@ +name: "picard_markduplicates" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--bam" + description: "Input BAM file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fasta" + description: "Reference genome FASTA file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fai" + description: "Reference genome FASTA index" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_picard_args" + description: "Additional argument to be passed to Picard MarkDuplicates" + info: null + default: + - "--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT\ + \ --TMP_DIR tmp" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_bam" + description: "BAM file with duplicate reads marked/removed" + info: null + default: + - "$id.MarkDuplicates.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bai" + description: "An optional BAM index file. If desired, --CREATE_INDEX must be passed\ + \ as a flag" + info: null + default: + - "$id.MarkDuplicates.bam.bai" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--metrics" + description: "Duplicate metrics file generated by picard" + info: null + default: + - "$id.MarkDuplicates.metrics.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Locate and tag duplicate reads in a BAM file\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.paired_end.sorted.bam" +- type: "file" + path: "genome.fasta" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/picard/markduplicates/main.nf" + - "modules/nf-core/picard/markduplicates/meta.yml" + last_sha: "55398de6ab7577acfe9b1180016a93d7af7eb859" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && \\\napt-get install -y build-essential openjdk-17-jdk wget\ + \ && \\\nwget --no-check-certificate https://github.com/broadinstitute/picard/releases/download/3.1.1/picard.jar\ + \ && \\\nmv picard.jar /usr/local/bin \n" + env: + - "PICARD=/usr/local/bin/picard.jar" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/picard_markduplicates/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/picard_markduplicates" + executable: "target/nextflow/picard_markduplicates/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/picard_markduplicates/main.nf b/target/nextflow/picard_markduplicates/main.nf new file mode 100644 index 0000000..ca449a8 --- /dev/null +++ b/target/nextflow/picard_markduplicates/main.nf @@ -0,0 +1,3902 @@ +// picard_markduplicates v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "picard_markduplicates", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--bam", + "description" : "Input BAM file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fasta", + "description" : "Reference genome FASTA file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fai", + "description" : "Reference genome FASTA index", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extra_picard_args", + "description" : "Additional argument to be passed to Picard MarkDuplicates", + "default" : [ + "--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output_bam", + "description" : "BAM file with duplicate reads marked/removed", + "default" : [ + "$id.MarkDuplicates.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bai", + "description" : "An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag", + "default" : [ + "$id.MarkDuplicates.bam.bai" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--metrics", + "description" : "Duplicate metrics file generated by picard", + "default" : [ + "$id.MarkDuplicates.metrics.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Locate and tag duplicate reads in a BAM file\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam" + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/genome.fasta" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/picard/markduplicates/main.nf", + "modules/nf-core/picard/markduplicates/meta.yml" + ], + "last_sha" : "55398de6ab7577acfe9b1180016a93d7af7eb859" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\napt-get install -y build-essential openjdk-17-jdk wget && \\\\\nwget --no-check-certificate https://github.com/broadinstitute/picard/releases/download/3.1.1/picard.jar && \\\\\nmv picard.jar /usr/local/bin \n" + ], + "env" : [ + "PICARD=/usr/local/bin/picard.jar" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/picard_markduplicates/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/picard_markduplicates", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTA+x} ]; then echo "${VIASH_PAR_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fasta='&'#" ; else echo "# par_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_FAI+x} ]; then echo "${VIASH_PAR_FAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fai='&'#" ; else echo "# par_fai="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRA_PICARD_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_PICARD_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extra_picard_args='&'#" ; else echo "# par_extra_picard_args="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_BAM+x} ]; then echo "${VIASH_PAR_OUTPUT_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_bam='&'#" ; else echo "# par_output_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) +$( if [ ! -z ${VIASH_PAR_METRICS+x} ]; then echo "${VIASH_PAR_METRICS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_metrics='&'#" ; else echo "# par_metrics="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +avail_mem=3072 +if [ ! \\$meta_memory_mb ]; then + echo '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' +else + avail_mem=\\$(( \\$meta_memory_mb*0.8 )) +fi + +java -Xmx\\${avail_mem}M -jar \\$PICARD MarkDuplicates \\\\ + \\$par_extra_picard_args \\\\ + --INPUT \\$par_bam \\\\ + --OUTPUT \\$par_output_bam \\\\ + --REFERENCE_SEQUENCE \\$par_fasta \\\\ + --METRICS_FILE \\$par_metrics +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/picard_markduplicates", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/picard_markduplicates/nextflow.config b/target/nextflow/picard_markduplicates/nextflow.config new file mode 100644 index 0000000..db3e3e1 --- /dev/null +++ b/target/nextflow/picard_markduplicates/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'picard_markduplicates' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Locate and tag duplicate reads in a BAM file\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/picard_markduplicates/nextflow_labels.config b/target/nextflow/picard_markduplicates/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/picard_markduplicates/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/picard_markduplicates/nextflow_schema.json b/target/nextflow/picard_markduplicates/nextflow_schema.json new file mode 100644 index 0000000..3e64f4b --- /dev/null +++ b/target/nextflow/picard_markduplicates/nextflow_schema.json @@ -0,0 +1,148 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "picard_markduplicates", +"description": "Locate and tag duplicate reads in a BAM file\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "bam": { + "type": + "string", + "description": "Type: `file`. Input BAM file", + "help_text": "Type: `file`. Input BAM file" + + } + + + , + "fasta": { + "type": + "string", + "description": "Type: `file`. Reference genome FASTA file", + "help_text": "Type: `file`. Reference genome FASTA file" + + } + + + , + "fai": { + "type": + "string", + "description": "Type: `file`. Reference genome FASTA index", + "help_text": "Type: `file`. Reference genome FASTA index" + + } + + + , + "extra_picard_args": { + "type": + "string", + "description": "Type: `string`, default: `--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp`. Additional argument to be passed to Picard MarkDuplicates", + "help_text": "Type: `string`, default: `--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp`. Additional argument to be passed to Picard MarkDuplicates" + , + "default":"--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_bam": { + "type": + "string", + "description": "Type: `file`, default: `$id.MarkDuplicates.bam`. BAM file with duplicate reads marked/removed", + "help_text": "Type: `file`, default: `$id.MarkDuplicates.bam`. BAM file with duplicate reads marked/removed" + , + "default":"$id.MarkDuplicates.bam" + } + + + , + "bai": { + "type": + "string", + "description": "Type: `file`, default: `$id.MarkDuplicates.bam.bai`. An optional BAM index file", + "help_text": "Type: `file`, default: `$id.MarkDuplicates.bam.bai`. An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag" + , + "default":"$id.MarkDuplicates.bam.bai" + } + + + , + "metrics": { + "type": + "string", + "description": "Type: `file`, default: `$id.MarkDuplicates.metrics.txt`. Duplicate metrics file generated by picard", + "help_text": "Type: `file`, default: `$id.MarkDuplicates.metrics.txt`. Duplicate metrics file generated by picard" + , + "default":"$id.MarkDuplicates.metrics.txt" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/prepare_multiqc_input/.config.vsh.yaml b/target/nextflow/prepare_multiqc_input/.config.vsh.yaml new file mode 100644 index 0000000..1347f50 --- /dev/null +++ b/target/nextflow/prepare_multiqc_input/.config.vsh.yaml @@ -0,0 +1,452 @@ +name: "prepare_multiqc_input" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--fail_trimming_multiqc" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--fail_mapping_multiqc" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--fail_strand_multiqc" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_raw_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--fastqc_trim_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--trim_log_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--sortmerna_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--star_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--salmon_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--samtools_stats" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--samtools_flagstat" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--samtools_idxstats" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--markduplicates_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--pseudo_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--featurecounts_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--featurecounts_rrna_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--aligner_pca_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--aligner_clustering_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_aligner_pca_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_aligner_clustering_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--preseq_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--qualimap_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--dupradar_output_dup_intercept_mqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--dupradar_output_duprate_exp_denscurve_mqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--bamstat_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--inferexperiment_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--innerdistance_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--junctionannotation_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--junctionsaturation_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--readdistribution_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--readduplication_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--tin_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--multiqc_config" + description: "Custom multiqc configuration file\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Ouput" + arguments: + - type: "file" + name: "--output" + info: null + default: + - "multiqc_input" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "multiqc_config.yml" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Prepare directory with all the input files for MultiQC.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/prepare_multiqc_input/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/prepare_multiqc_input" + executable: "target/nextflow/prepare_multiqc_input/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/prepare_multiqc_input/main.nf b/target/nextflow/prepare_multiqc_input/main.nf new file mode 100644 index 0000000..d37576f --- /dev/null +++ b/target/nextflow/prepare_multiqc_input/main.nf @@ -0,0 +1,4220 @@ +// prepare_multiqc_input v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "prepare_multiqc_input", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "string", + "name" : "--fail_trimming_multiqc", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--fail_mapping_multiqc", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--fail_strand_multiqc", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_raw_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--fastqc_trim_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--trim_log_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--sortmerna_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--star_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--salmon_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--samtools_stats", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--samtools_flagstat", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--samtools_idxstats", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--markduplicates_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--pseudo_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--featurecounts_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--featurecounts_rrna_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--aligner_pca_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--aligner_clustering_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_aligner_pca_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_aligner_clustering_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--preseq_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--qualimap_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--dupradar_output_dup_intercept_mqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--dupradar_output_duprate_exp_denscurve_mqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--bamstat_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--inferexperiment_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--innerdistance_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--junctionannotation_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--junctionsaturation_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--readdistribution_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--readduplication_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--tin_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--multiqc_config", + "description" : "Custom multiqc configuration file\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Ouput", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "default" : [ + "multiqc_input" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/assets/multiqc_config.yml" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Prepare directory with all the input files for MultiQC.\n", + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/prepare_multiqc_input/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/prepare_multiqc_input", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_FAIL_TRIMMING_MULTIQC+x} ]; then echo "${VIASH_PAR_FAIL_TRIMMING_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fail_trimming_multiqc='&'#" ; else echo "# par_fail_trimming_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_FAIL_MAPPING_MULTIQC+x} ]; then echo "${VIASH_PAR_FAIL_MAPPING_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fail_mapping_multiqc='&'#" ; else echo "# par_fail_mapping_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_FAIL_STRAND_MULTIQC+x} ]; then echo "${VIASH_PAR_FAIL_STRAND_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fail_strand_multiqc='&'#" ; else echo "# par_fail_strand_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQC_RAW_MULTIQC+x} ]; then echo "${VIASH_PAR_FASTQC_RAW_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_raw_multiqc='&'#" ; else echo "# par_fastqc_raw_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQC_TRIM_MULTIQC+x} ]; then echo "${VIASH_PAR_FASTQC_TRIM_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_trim_multiqc='&'#" ; else echo "# par_fastqc_trim_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIM_LOG_MULTIQC+x} ]; then echo "${VIASH_PAR_TRIM_LOG_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trim_log_multiqc='&'#" ; else echo "# par_trim_log_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_SORTMERNA_MULTIQC+x} ]; then echo "${VIASH_PAR_SORTMERNA_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sortmerna_multiqc='&'#" ; else echo "# par_sortmerna_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_STAR_MULTIQC+x} ]; then echo "${VIASH_PAR_STAR_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_star_multiqc='&'#" ; else echo "# par_star_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_SALMON_MULTIQC+x} ]; then echo "${VIASH_PAR_SALMON_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_salmon_multiqc='&'#" ; else echo "# par_salmon_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMTOOLS_STATS+x} ]; then echo "${VIASH_PAR_SAMTOOLS_STATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_samtools_stats='&'#" ; else echo "# par_samtools_stats="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMTOOLS_FLAGSTAT+x} ]; then echo "${VIASH_PAR_SAMTOOLS_FLAGSTAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_samtools_flagstat='&'#" ; else echo "# par_samtools_flagstat="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMTOOLS_IDXSTATS+x} ]; then echo "${VIASH_PAR_SAMTOOLS_IDXSTATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_samtools_idxstats='&'#" ; else echo "# par_samtools_idxstats="; fi ) +$( if [ ! -z ${VIASH_PAR_MARKDUPLICATES_MULTIQC+x} ]; then echo "${VIASH_PAR_MARKDUPLICATES_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_markduplicates_multiqc='&'#" ; else echo "# par_markduplicates_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_PSEUDO_MULTIQC+x} ]; then echo "${VIASH_PAR_PSEUDO_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pseudo_multiqc='&'#" ; else echo "# par_pseudo_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_FEATURECOUNTS_MULTIQC+x} ]; then echo "${VIASH_PAR_FEATURECOUNTS_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_featurecounts_multiqc='&'#" ; else echo "# par_featurecounts_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC+x} ]; then echo "${VIASH_PAR_FEATURECOUNTS_RRNA_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_featurecounts_rrna_multiqc='&'#" ; else echo "# par_featurecounts_rrna_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_ALIGNER_PCA_MULTIQC+x} ]; then echo "${VIASH_PAR_ALIGNER_PCA_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_aligner_pca_multiqc='&'#" ; else echo "# par_aligner_pca_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC+x} ]; then echo "${VIASH_PAR_ALIGNER_CLUSTERING_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_aligner_clustering_multiqc='&'#" ; else echo "# par_aligner_clustering_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC+x} ]; then echo "${VIASH_PAR_PSEUDO_ALIGNER_PCA_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pseudo_aligner_pca_multiqc='&'#" ; else echo "# par_pseudo_aligner_pca_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC+x} ]; then echo "${VIASH_PAR_PSEUDO_ALIGNER_CLUSTERING_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pseudo_aligner_clustering_multiqc='&'#" ; else echo "# par_pseudo_aligner_clustering_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_PRESEQ_MULTIQC+x} ]; then echo "${VIASH_PAR_PRESEQ_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_preseq_multiqc='&'#" ; else echo "# par_preseq_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_QUALIMAP_MULTIQC+x} ]; then echo "${VIASH_PAR_QUALIMAP_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qualimap_multiqc='&'#" ; else echo "# par_qualimap_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC+x} ]; then echo "${VIASH_PAR_DUPRADAR_OUTPUT_DUP_INTERCEPT_MQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dupradar_output_dup_intercept_mqc='&'#" ; else echo "# par_dupradar_output_dup_intercept_mqc="; fi ) +$( if [ ! -z ${VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC+x} ]; then echo "${VIASH_PAR_DUPRADAR_OUTPUT_DUPRATE_EXP_DENSCURVE_MQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dupradar_output_duprate_exp_denscurve_mqc='&'#" ; else echo "# par_dupradar_output_duprate_exp_denscurve_mqc="; fi ) +$( if [ ! -z ${VIASH_PAR_BAMSTAT_MULTIQC+x} ]; then echo "${VIASH_PAR_BAMSTAT_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bamstat_multiqc='&'#" ; else echo "# par_bamstat_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_INFEREXPERIMENT_MULTIQC+x} ]; then echo "${VIASH_PAR_INFEREXPERIMENT_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_inferexperiment_multiqc='&'#" ; else echo "# par_inferexperiment_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_INNERDISTANCE_MULTIQC+x} ]; then echo "${VIASH_PAR_INNERDISTANCE_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_innerdistance_multiqc='&'#" ; else echo "# par_innerdistance_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_JUNCTIONANNOTATION_MULTIQC+x} ]; then echo "${VIASH_PAR_JUNCTIONANNOTATION_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_junctionannotation_multiqc='&'#" ; else echo "# par_junctionannotation_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_JUNCTIONSATURATION_MULTIQC+x} ]; then echo "${VIASH_PAR_JUNCTIONSATURATION_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_junctionsaturation_multiqc='&'#" ; else echo "# par_junctionsaturation_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_READDISTRIBUTION_MULTIQC+x} ]; then echo "${VIASH_PAR_READDISTRIBUTION_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_readdistribution_multiqc='&'#" ; else echo "# par_readdistribution_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_READDUPLICATION_MULTIQC+x} ]; then echo "${VIASH_PAR_READDUPLICATION_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_readduplication_multiqc='&'#" ; else echo "# par_readduplication_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_TIN_MULTIQC+x} ]; then echo "${VIASH_PAR_TIN_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tin_multiqc='&'#" ; else echo "# par_tin_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_MULTIQC_CONFIG+x} ]; then echo "${VIASH_PAR_MULTIQC_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_multiqc_config='&'#" ; else echo "# par_multiqc_config="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +mkdir -p \\$par_output + +echo \\$par_fail_trimming_multiqc > \\$par_output/fail_trimming_mqc.tsv +echo \\$par_fail_mapping_multiqc > \\$par_output/fail_mapping_mqc.tsv +echo \\$par_fail_strand_multiqc > \\$par_output/fail_strand_mqc.tsv + +IFS="," read -ra fastqc_raw_multiqc <<< \\$par_fastqc_raw_multiqc && for file in "\\${fastqc_raw_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra fastqc_trim_multiqc <<< \\$par_fastqc_trim_multiqc && for file in "\\${fastqc_trim_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra trim_log_multiqc <<< \\$par_trim_log_multiqc && for file in "\\${trim_log_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra sortmerna_multiqc <<< \\$par_sortmerna_multiqc && for file in "\\${sortmerna_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra star_multiqc <<< \\$par_star_multiqc && for file in "\\${star_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +# IFS="," read -ra hisat2_multiqc <<< \\$par_hisat2_multiqc && for file in "\\${hisat2_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra rsem_multiqc <<< \\$par_rsem_multiqc && for file in "\\${rsem_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra salmon_multiqc <<< \\$par_salmon_multiqc && for file in "\\${salmon_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra pseudo_multiqc <<< \\$par_pseudo_multiqc && for file in "\\${pseudo_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra samtools_stats <<< \\$par_samtools_stats && for file in "\\${samtools_stats[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" \\$par_output/; done + +IFS="," read -ra samtools_flagstat <<< \\$par_samtools_flagstat && for file in "\\${samtools_flagstat[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" \\$par_output/; done + +IFS="," read -ra samtools_idxstats <<< \\$par_samtools_idxstats && for file in "\\${samtools_idxstats[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra markduplicates_multiqc <<< \\$par_markduplicates_multiqc && for file in "\\${markduplicates_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra featurecounts_multiqc <<< \\$par_featurecounts_multiqc && for file in "\\${featurecounts_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra featurecounts_rrna_multiqc <<< \\$par_featurecounts_rrna_multiqc && for file in "\\${featurecounts_rrna_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +[ -e "\\$par_aligner_pca_multiqc" ] && cp -r "\\$par_aligner_pca_multiqc" "\\$par_output/" + +[ -e "\\$par_aligner_clustering_multiqc" ] && cp -r \\$par_aligner_clustering_multiqc "\\$par_output/" + +[ -e "\\$par_pseudo_aligner_pca_multiqc" ] && cp -r \\$par_pseudo_aligner_pca_multiqc "\\$par_output/" + +[ -e "\\$par_pseudo_aligner_clustering_multiqc" ] && cp -r \\$par_pseudo_aligner_clustering_multiqc "\\$par_output/" + +IFS="," read -ra preseq_multiqc <<< \\$par_preseq_multiqc && for file in "\\${preseq_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra qualimap_multiqc <<< \\$par_qualimap_multiqc && for file in "\\${qualimap_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra dupradar_output_dup_intercept_mqc <<< \\$par_dupradar_output_dup_intercept_mqc && for file in "\\${dupradar_output_dup_intercept_mqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra dupradar_output_duprate_exp_denscurve_mqc <<< \\$par_dupradar_output_duprate_exp_denscurve_mqc && for file in "\\${dupradar_output_duprate_exp_denscurve_mqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra bamstat_multiqc <<< \\$par_bamstat_multiqc && for file in "\\${bamstat_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra inferexperiment_multiqc <<< \\$par_inferexperiment_multiqc && for file in "\\${inferexperiment_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra innerdistance_multiqc <<< \\$par_innerdistance_multiqc && for file in "\\${innerdistance_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra junctionannotation_multiqc <<< \\$par_junctionannotation_multiqc && for file in "\\${junctionannotation_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra junctionsaturation_multiqc <<< \\$par_junctionsaturation_multiqc && for file in "\\${junctionsaturation_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra readdistribution_multiqc <<< \\$par_readdistribution_multiqc && for file in "\\${readdistribution_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra readduplication_multiqc <<< \\$par_readduplication_multiqc && for file in "\\${readduplication_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +IFS="," read -ra tin_multiqc <<< \\$par_tin_multiqc && for file in "\\${tin_multiqc[@]}"; do [ -e "\\$file" ] && cp -r "\\$file" "\\$par_output/"; done + +echo "Checking for custom multiqc_config" +# If the variable is empty, we use the default one (registered as a resource) +if [ -z \\$par_multiqc_config ]; then + echo "No multiqc_config provided, using the default" + cp \\$meta_resources_dir/multiqc_config.yml "\\$par_output" +else + echo "Optional file provided" + if [ -f \\$par_multiqc_config ]; then + cp \\$par_multiqc_config "\\$par_output"/multiqc_config.yml + else + # Unreachable: the Viash-generated module checks this + echo "Optional file does not exist" + exit 1 + fi +fi + +echo "Done" +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/prepare_multiqc_input", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/prepare_multiqc_input/multiqc_config.yml b/target/nextflow/prepare_multiqc_input/multiqc_config.yml new file mode 100644 index 0000000..0ac2fb1 --- /dev/null +++ b/target/nextflow/prepare_multiqc_input/multiqc_config.yml @@ -0,0 +1,164 @@ +report_comment: > + This report has been generated by the + analysis pipeline. +report_section_order: + "rnaseq-methods-description": + order: -1000 + software_versions: + order: -1001 + "rnaseq.vsh-summary": + order: -1002 + +export_plots: true +disable_version_detection: true + +# Run only these modules +run_modules: + - custom_content + - fastqc + - cutadapt + - fastp + - sortmerna + - star + - rsem + - salmon + - kallisto + - samtools + - picard + - preseq + - rseqc + - qualimap + +# Order of modules +top_modules: + - "fail_trimming" + - "fail_mapping" + - "fail_strand" + - "star_rsem_deseq2_pca" + - "star_rsem_deseq2_clustering" + - "star_salmon_deseq2_pca" + - "star_salmon_deseq2_clustering" + - "salmon_deseq2_pca" + - "salmon_deseq2_clustering" + - "kallisto_deseq2_pca" + - "kallisto_deseq2_clustering" + - "biotype_counts" + - "dupradar" + +module_order: + - fastqc: + name: "FastQC (raw)" + info: "This section of the report shows FastQC results before adapter trimming." + path_filters: + - "*.read_*.fastqc.zip" + - cutadapt + - fastp + - fastqc: + name: "FastQC (trimmed)" + info: "This section of the report shows FastQC results after adapter trimming." + path_filters: + - "*.trimgalore.read_*.fastqc.zip" + +# Don't show % Dups in the General Stats table (we have this from Picard) +table_columns_visible: + fastqc: + percent_duplicates: False + +extra_fn_clean_extn: + # - ".mapping_quality" + # - ".MarkDuplicates_flagstat.output.flagstat" + # - ".MarkDuplicates_idxstats.output.idxstats" + # - ".MarkDuplicates_stats.output.txt" + # - ".genome_sorted_MarkDuplicates.output.bam" + # - ".genome_sorted_MarkDuplicates" + - ".read_1" + - ".read_2" + +# See https://github.com/ewels/MultiQC_TestData/blob/master/data/custom_content/with_config/table_headerconfig/multiqc_config.yaml +custom_data: + fail_trimming: + section_name: "WARNING: Fail Trimming Check" + description: "List of samples that failed the minimum trimmed reads threshold specified via the '--min_trimmed_reads' parameter, and hence were ignored for the downstream processing steps." + plot_type: "table" + pconfig: + id: "fail_trimmed_samples_table" + table_title: "Samples failed trimming threshold" + namespace: "Samples failed trimming threshold" + format: "{:.0f}" + fail_mapping: + section_name: "WARNING: Fail Alignment Check" + description: "List of samples that failed the STAR minimum mapped reads threshold specified via the '--min_mapped_reads' parameter, and hence were ignored for the downstream processing steps." + plot_type: "table" + pconfig: + id: "fail_mapped_samples_table" + table_title: "Samples failed mapping threshold" + namespace: "Samples failed mapping threshold" + format: "{:.2f}" + fail_strand: + section_name: "WARNING: Fail Strand Check" + description: "List of samples that failed the strandedness check between that provided in the samplesheet and calculated by the RSeQC infer_experiment.py tool." + plot_type: "table" + pconfig: + id: "fail_strand_check_table" + table_title: "Samples failed strandedness check" + namespace: "Samples failed strandedness check" + format: "{:.2f}" + +# Customise the module search patterns to speed up execution time +# - Skip module sub-tools that we are not interested in +# - Replace file-content searching with filename pattern searching +# - Don't add anything that is the same as the MultiQC default +# See https://multiqc.info/docs/#optimise-file-search-patterns for details +sp: + + fastqc/zip: + fn: "*.fastqc.zip" + + cutadapt: + fn: "*.trimming_report*.txt" + + fastp: + fn: "*.fastp_out.json" + + sortmerna: + fn: "*sortmerna*.log" + + star: + fn: "*.star_align_reads.log.txt" + + # hisat2: + # fn: "*.hisat2.summary.log" + # salmon: + # fn: "*meta_info.json" + + preseq: + fn: "*.lc_extrap.txt" + + samtools/stats: + fn: "*_stats.output.txt" + samtools/flagstat: + fn: "*_flagstat.output.flagstat" + samtools/idxstats: + fn: "*_idxstats.output.idxstats" + + rseqc/bam_stat: + fn: "*.mapping_quality.txt" + rseqc/junction_saturation: + fn: "*.junction_saturation_plot.r" + rseqc/junction_annotation: + fn: "*.junction_annotation.log" + rseqc/read_duplication_pos: + fn: "*.duplication_rate_mapping.xls" + rseqc/read_distribution: + fn: "*.read_distribution.txt" + rseqc/infer_experiment: + fn: "*.strandedness.txt" + rseqc/inner_distance: + fn: "*.inner_distance_freq.txt" + rseqc/tin: + fn: "*.tin_summary.txt" + + picard/markdups: + fn: "*.MarkDuplicates.metrics.txt" + +skip_versions_section: true diff --git a/target/nextflow/prepare_multiqc_input/nextflow.config b/target/nextflow/prepare_multiqc_input/nextflow.config new file mode 100644 index 0000000..174439a --- /dev/null +++ b/target/nextflow/prepare_multiqc_input/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'prepare_multiqc_input' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Prepare directory with all the input files for MultiQC.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/prepare_multiqc_input/nextflow_labels.config b/target/nextflow/prepare_multiqc_input/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/prepare_multiqc_input/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/prepare_multiqc_input/nextflow_schema.json b/target/nextflow/prepare_multiqc_input/nextflow_schema.json new file mode 100644 index 0000000..9f56067 --- /dev/null +++ b/target/nextflow/prepare_multiqc_input/nextflow_schema.json @@ -0,0 +1,415 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "prepare_multiqc_input", +"description": "Prepare directory with all the input files for MultiQC.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "fail_trimming_multiqc": { + "type": + "string", + "description": "Type: `string`. ", + "help_text": "Type: `string`. " + + } + + + , + "fail_mapping_multiqc": { + "type": + "string", + "description": "Type: `string`. ", + "help_text": "Type: `string`. " + + } + + + , + "fail_strand_multiqc": { + "type": + "string", + "description": "Type: `string`. ", + "help_text": "Type: `string`. " + + } + + + , + "fastqc_raw_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "fastqc_trim_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "trim_log_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "sortmerna_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "star_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "salmon_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "samtools_stats": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "samtools_flagstat": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "samtools_idxstats": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "markduplicates_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "pseudo_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "featurecounts_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "featurecounts_rrna_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "aligner_pca_multiqc": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "aligner_clustering_multiqc": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "pseudo_aligner_pca_multiqc": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "pseudo_aligner_clustering_multiqc": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "preseq_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "qualimap_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "dupradar_output_dup_intercept_mqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "dupradar_output_duprate_exp_denscurve_mqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "bamstat_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "inferexperiment_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "innerdistance_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "junctionannotation_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "junctionsaturation_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "readdistribution_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "readduplication_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "tin_multiqc": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "multiqc_config": { + "type": + "string", + "description": "Type: `file`. Custom multiqc configuration file\n", + "help_text": "Type: `file`. Custom multiqc configuration file\n" + + } + + +} +}, + + + "ouput" : { + "title": "Ouput", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `multiqc_input`. ", + "help_text": "Type: `file`, default: `multiqc_input`. " + , + "default":"multiqc_input" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/ouput" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/preprocess_transcripts_fasta/.config.vsh.yaml b/target/nextflow/preprocess_transcripts_fasta/.config.vsh.yaml new file mode 100644 index 0000000..50ee7f5 --- /dev/null +++ b/target/nextflow/preprocess_transcripts_fasta/.config.vsh.yaml @@ -0,0 +1,178 @@ +name: "preprocess_transcripts_fasta" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--transcript_fasta" + description: "Path of transcripts FASTA file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + description: "Path of processed output FASTA file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Process transcripts FASTA if GTF file is GENOCODE format\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "transcriptome.fasta" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/preprocess_transcripts_fasta_gencode.nf" + last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/preprocess_transcripts_fasta/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/preprocess_transcripts_fasta" + executable: "target/nextflow/preprocess_transcripts_fasta/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/preprocess_transcripts_fasta/main.nf b/target/nextflow/preprocess_transcripts_fasta/main.nf new file mode 100644 index 0000000..2ea1426 --- /dev/null +++ b/target/nextflow/preprocess_transcripts_fasta/main.nf @@ -0,0 +1,3810 @@ +// preprocess_transcripts_fasta v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "preprocess_transcripts_fasta", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--transcript_fasta", + "description" : "Path of transcripts FASTA file", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Path of processed output FASTA file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Process transcripts FASTA if GTF file is GENOCODE format\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/minimal_test/reference/transcriptome.fasta" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/local/preprocess_transcripts_fasta_gencode.nf" + ], + "last_sha" : "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/preprocess_transcripts_fasta/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/preprocess_transcripts_fasta", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_TRANSCRIPT_FASTA+x} ]; then echo "${VIASH_PAR_TRANSCRIPT_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcript_fasta='&'#" ; else echo "# par_transcript_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +filename="\\$(basename -- "\\$par_transcript_fasta")" + +if [ \\${filename##*.} == "gz" ]; then + zcat \\$par_transcript_fasta | cut -d "|" -f1 > \\$par_output +else + cat \\$par_transcript_fasta | cut -d "|" -f1 > \\$par_output +fi +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/preprocess_transcripts_fasta", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/preprocess_transcripts_fasta/nextflow.config b/target/nextflow/preprocess_transcripts_fasta/nextflow.config new file mode 100644 index 0000000..3d6feff --- /dev/null +++ b/target/nextflow/preprocess_transcripts_fasta/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'preprocess_transcripts_fasta' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Process transcripts FASTA if GTF file is GENOCODE format\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/preprocess_transcripts_fasta/nextflow_labels.config b/target/nextflow/preprocess_transcripts_fasta/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/preprocess_transcripts_fasta/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/preprocess_transcripts_fasta/nextflow_schema.json b/target/nextflow/preprocess_transcripts_fasta/nextflow_schema.json new file mode 100644 index 0000000..9ff11b1 --- /dev/null +++ b/target/nextflow/preprocess_transcripts_fasta/nextflow_schema.json @@ -0,0 +1,95 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "preprocess_transcripts_fasta", +"description": "Process transcripts FASTA if GTF file is GENOCODE format\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "transcript_fasta": { + "type": + "string", + "description": "Type: `file`, required. Path of transcripts FASTA file", + "help_text": "Type: `file`, required. Path of transcripts FASTA file" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output`. Path of processed output FASTA file", + "help_text": "Type: `file`, required, default: `$id.$key.output`. Path of processed output FASTA file." + , + "default":"$id.$key.output" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/preseq_lcextrap/.config.vsh.yaml b/target/nextflow/preseq_lcextrap/.config.vsh.yaml new file mode 100644 index 0000000..8d9ca97 --- /dev/null +++ b/target/nextflow/preseq_lcextrap/.config.vsh.yaml @@ -0,0 +1,231 @@ +name: "preseq_lcextrap" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "Input genome BAM/BED file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_preseq_args" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--paired" + description: "Paired-end reads?" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + info: null + default: + - "$id.lc_extrap.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Computing the expected future yield of distinct reads and bounds on\ + \ the number of total distinct reads in the library and the associated confidence\ + \ intervals." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "a.sorted.bed" +- type: "file" + path: "SRR1106616_5M_subset.bam" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/preseq/lcextrap/main.nf" + - "modules/nf-core/preseq/lcextrap/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "curl" + - "bzip2" + - "build-essential" + - "wget" + - "gcc" + - "autoconf" + - "automake" + - "make" + - "libz-dev" + - "libbz2-dev" + - "zlib1g-dev" + - "libncurses5-dev" + - "libncursesw5-dev" + - "liblzma-dev" + - "pip" + interactive: false + - type: "docker" + run: + - "cd /usr/bin && \\\nwget --no-check-certificate https://github.com/smithlabcode/preseq/releases/download/v3.2.0/preseq-3.2.0.tar.gz\ + \ && \\\nwget https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2\ + \ && \\\nwget --no-check-certificate https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static\ + \ && \\\ncurl -fsSL https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2\ + \ -o samtools-1.18.tar.bz2 && \\\ntar -xjf samtools-1.18.tar.bz2 && rm samtools-1.18.tar.bz2\ + \ && \\\ntar -xzf preseq-3.2.0.tar.gz && rm preseq-3.2.0.tar.gz && \\\ntar -vxjf\ + \ htslib-1.9.tar.bz2 && rm htslib-1.9.tar.bz2 && \\\nmv bedtools.static /usr/local/bin/bedtools\ + \ && \\\nchmod a+x /usr/local/bin/bedtools && \\\ncd samtools-1.18 && \\\n./configure\ + \ && \\\nmake && \\\nmake install && \\\ncd /usr/bin && cd htslib-1.9 && \\\n\ + make && \\\ncd /usr/bin && cd preseq-3.2.0 && \\\nmkdir build && cd build &&\ + \ \\\n../configure && \\\nmake && make install && make HAVE_HTSLIB=1 all \n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/preseq_lcextrap/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/preseq_lcextrap" + executable: "target/nextflow/preseq_lcextrap/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/preseq_lcextrap/main.nf b/target/nextflow/preseq_lcextrap/main.nf new file mode 100644 index 0000000..d7e3603 --- /dev/null +++ b/target/nextflow/preseq_lcextrap/main.nf @@ -0,0 +1,3883 @@ +// preseq_lcextrap v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "preseq_lcextrap", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input genome BAM/BED file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extra_preseq_args", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--paired", + "description" : "Paired-end reads?", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "default" : [ + "$id.lc_extrap.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Computing the expected future yield of distinct reads and bounds on the number of total distinct reads in the library and the associated confidence intervals.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/a.sorted.bed" + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/SRR1106616_5M_subset.bam" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/preseq/lcextrap/main.nf", + "modules/nf-core/preseq/lcextrap/meta.yml" + ], + "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "curl", + "bzip2", + "build-essential", + "wget", + "gcc", + "autoconf", + "automake", + "make", + "libz-dev", + "libbz2-dev", + "zlib1g-dev", + "libncurses5-dev", + "libncursesw5-dev", + "liblzma-dev", + "pip" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "cd /usr/bin && \\\\\nwget --no-check-certificate https://github.com/smithlabcode/preseq/releases/download/v3.2.0/preseq-3.2.0.tar.gz && \\\\\nwget https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2 && \\\\\nwget --no-check-certificate https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static && \\\\\ncurl -fsSL https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2 -o samtools-1.18.tar.bz2 && \\\\\ntar -xjf samtools-1.18.tar.bz2 && rm samtools-1.18.tar.bz2 && \\\\\ntar -xzf preseq-3.2.0.tar.gz && rm preseq-3.2.0.tar.gz && \\\\\ntar -vxjf htslib-1.9.tar.bz2 && rm htslib-1.9.tar.bz2 && \\\\\nmv bedtools.static /usr/local/bin/bedtools && \\\\\nchmod a+x /usr/local/bin/bedtools && \\\\\ncd samtools-1.18 && \\\\\n./configure && \\\\\nmake && \\\\\nmake install && \\\\\ncd /usr/bin && cd htslib-1.9 && \\\\\nmake && \\\\\ncd /usr/bin && cd preseq-3.2.0 && \\\\\nmkdir build && cd build && \\\\\n../configure && \\\\\nmake && make install && make HAVE_HTSLIB=1 all \n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/preseq_lcextrap/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/preseq_lcextrap", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRA_PRESEQ_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_PRESEQ_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extra_preseq_args='&'#" ; else echo "# par_extra_preseq_args="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +file=\\$(basename -- "\\$par_input") +filename="\\${file%.*}" + +if [ "\\${file##*.}" == "bam" ]; then + samtools sort -o sorted_\\$filename.bam -n \\$par_input + bedtools bamtobed -i sorted_\\$filename.bam > \\$filename.bed + bedtools sort -i \\$filename.bed > sorted_\\$filename.bed +elif [ "\\${file##*.}" == "bed" ]; then + bedtools sort -i \\$par_input > sorted_\\$filename.bed +else + echo "Invalid input file format!" + exit 1 +fi + +if \\$par_paired; then + paired="-pe" +else + paired="" +fi + +preseq lc_extrap \\\\ + sorted_\\$filename.bed \\\\ + \\$paired \\\\ + \\$par_extra_preseq_args \\\\ + -o \\$par_output +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/preseq_lcextrap", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/preseq_lcextrap/nextflow.config b/target/nextflow/preseq_lcextrap/nextflow.config new file mode 100644 index 0000000..8fdc397 --- /dev/null +++ b/target/nextflow/preseq_lcextrap/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'preseq_lcextrap' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Computing the expected future yield of distinct reads and bounds on the number of total distinct reads in the library and the associated confidence intervals.' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/preseq_lcextrap/nextflow_labels.config b/target/nextflow/preseq_lcextrap/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/preseq_lcextrap/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/preseq_lcextrap/nextflow_schema.json b/target/nextflow/preseq_lcextrap/nextflow_schema.json new file mode 100644 index 0000000..4947c16 --- /dev/null +++ b/target/nextflow/preseq_lcextrap/nextflow_schema.json @@ -0,0 +1,115 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "preseq_lcextrap", +"description": "Computing the expected future yield of distinct reads and bounds on the number of total distinct reads in the library and the associated confidence intervals.", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`. Input genome BAM/BED file", + "help_text": "Type: `file`. Input genome BAM/BED file" + + } + + + , + "extra_preseq_args": { + "type": + "string", + "description": "Type: `string`. ", + "help_text": "Type: `string`. " + + } + + + , + "paired": { + "type": + "boolean", + "description": "Type: `boolean`. Paired-end reads?", + "help_text": "Type: `boolean`. Paired-end reads?" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.lc_extrap.txt`. ", + "help_text": "Type: `file`, default: `$id.lc_extrap.txt`. " + , + "default":"$id.lc_extrap.txt" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/rsem_merge_counts/.config.vsh.yaml b/target/nextflow/rsem_merge_counts/.config.vsh.yaml new file mode 100644 index 0000000..a89812f --- /dev/null +++ b/target/nextflow/rsem_merge_counts/.config.vsh.yaml @@ -0,0 +1,221 @@ +name: "rsem_merge_counts" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--counts_gene" + description: "Expression counts on gene level (genes)" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcripts" + description: "Expression counts on transcript level (isoforms)" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--merged_gene_counts" + description: "File containing gene counts across all samples." + info: null + default: + - "rsem.merged.gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--merged_gene_tpm" + description: "File containing gene TPM across all samples." + info: null + default: + - "rsem.merged.gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--merged_transcript_counts" + description: "File containing transcript counts across all samples." + info: null + default: + - "rsem.merged.transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--merged_transcript_tpm" + description: "File containing transcript TPM across all samples." + info: null + default: + - "rsem.merged.transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Merge the transcript quantification results obtained from rsem calculate-expression\ + \ across all samples.\n" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/rsem_merge_counts/main.nf" + last_sha: "311279532694ce7520164ce4d65a388c0cd11f60" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rsem_merge_counts/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/rsem_merge_counts" + executable: "target/nextflow/rsem_merge_counts/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/rsem_merge_counts/main.nf b/target/nextflow/rsem_merge_counts/main.nf new file mode 100644 index 0000000..71b4ed0 --- /dev/null +++ b/target/nextflow/rsem_merge_counts/main.nf @@ -0,0 +1,3876 @@ +// rsem_merge_counts v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "rsem_merge_counts", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--counts_gene", + "description" : "Expression counts on gene level (genes)", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_transcripts", + "description" : "Expression counts on transcript level (isoforms)", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--merged_gene_counts", + "description" : "File containing gene counts across all samples.", + "default" : [ + "rsem.merged.gene_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--merged_gene_tpm", + "description" : "File containing gene TPM across all samples.", + "default" : [ + "rsem.merged.gene_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--merged_transcript_counts", + "description" : "File containing transcript counts across all samples.", + "default" : [ + "rsem.merged.transcript_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--merged_transcript_tpm", + "description" : "File containing transcript TPM across all samples.", + "default" : [ + "rsem.merged.transcript_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Merge the transcript quantification results obtained from rsem calculate-expression across all samples.\n", + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/local/rsem_merge_counts/main.nf" + ], + "last_sha" : "311279532694ce7520164ce4d65a388c0cd11f60" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/rsem_merge_counts/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/rsem_merge_counts", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE+x} ]; then echo "${VIASH_PAR_COUNTS_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_gene='&'#" ; else echo "# par_counts_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_TRANSCRIPTS+x} ]; then echo "${VIASH_PAR_COUNTS_TRANSCRIPTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_transcripts='&'#" ; else echo "# par_counts_transcripts="; fi ) +$( if [ ! -z ${VIASH_PAR_MERGED_GENE_COUNTS+x} ]; then echo "${VIASH_PAR_MERGED_GENE_COUNTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_merged_gene_counts='&'#" ; else echo "# par_merged_gene_counts="; fi ) +$( if [ ! -z ${VIASH_PAR_MERGED_GENE_TPM+x} ]; then echo "${VIASH_PAR_MERGED_GENE_TPM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_merged_gene_tpm='&'#" ; else echo "# par_merged_gene_tpm="; fi ) +$( if [ ! -z ${VIASH_PAR_MERGED_TRANSCRIPT_COUNTS+x} ]; then echo "${VIASH_PAR_MERGED_TRANSCRIPT_COUNTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_merged_transcript_counts='&'#" ; else echo "# par_merged_transcript_counts="; fi ) +$( if [ ! -z ${VIASH_PAR_MERGED_TRANSCRIPT_TPM+x} ]; then echo "${VIASH_PAR_MERGED_TRANSCRIPT_TPM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_merged_transcript_tpm='&'#" ; else echo "# par_merged_transcript_tpm="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -ep pipefail + +mkdir -p tmp/genes +# cut -f 1,2 \\`ls \\$par_count_genes/*\\` | head -n 1\\` > gene_ids.txt +for file_id in \\${par_count_genes[*]}; do + samplename=\\`basename \\$file_id | sed s/\\\\\\\\.genes.results\\\\\\$//g\\` + echo \\$samplename > tmp/genes/\\${samplename}.counts.txt + cut -f 5 \\${file_id} | tail -n+2 >> tmp/genes/\\${samplename}.counts.txt + echo \\$samplename > tmp/genes/\\${samplename}.tpm.txt + cut -f 6 \\${file_id} | tail -n+2 >> tmp/genes/\\${samplename}.tpm.txt +done + +mkdir -p tmp/isoforms +# cut -f 1,2 \\`ls \\$par_counts_transcripts/*\\` | head -n 1\\` > transcript_ids.txt +for file_id in \\${par_counts_transcripts[*]}; do + samplename=\\`basename \\$file_id | sed s/\\\\\\\\.isoforms.results\\\\\\$//g\\` + echo \\$samplename > tmp/isoforms/\\${samplename}.counts.txt + cut -f 5 \\${file_id} | tail -n+2 >> tmp/isoforms/\\${samplename}.counts.txt + echo \\$samplename > tmp/isoforms/\\${samplename}.tpm.txt + cut -f 6 \\${file_id} | tail -n+2 >> tmp/isoforms/\\${samplename}.tpm.txt +done + +paste gene_ids.txt tmp/genes/*.counts.txt > \\$par_merged_gene_counts +paste gene_ids.txt tmp/genes/*.tpm.txt > \\$par_merged_gene_tpm +paste transcript_ids.txt tmp/isoforms/*.counts.txt > \\$par_merged_transcript_counts +paste transcript_ids.txt tmp/isoforms/*.tpm.txt > \\$par_merged_transcript_tpm +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/rsem_merge_counts", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/rsem_merge_counts/nextflow.config b/target/nextflow/rsem_merge_counts/nextflow.config new file mode 100644 index 0000000..e026d35 --- /dev/null +++ b/target/nextflow/rsem_merge_counts/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'rsem_merge_counts' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Merge the transcript quantification results obtained from rsem calculate-expression across all samples.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/rsem_merge_counts/nextflow_labels.config b/target/nextflow/rsem_merge_counts/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/rsem_merge_counts/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/rsem_merge_counts/nextflow_schema.json b/target/nextflow/rsem_merge_counts/nextflow_schema.json new file mode 100644 index 0000000..e89573a --- /dev/null +++ b/target/nextflow/rsem_merge_counts/nextflow_schema.json @@ -0,0 +1,138 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "rsem_merge_counts", +"description": "Merge the transcript quantification results obtained from rsem calculate-expression across all samples.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "counts_gene": { + "type": + "string", + "description": "Type: `file`. Expression counts on gene level (genes)", + "help_text": "Type: `file`. Expression counts on gene level (genes)" + + } + + + , + "counts_transcripts": { + "type": + "string", + "description": "Type: `file`. Expression counts on transcript level (isoforms)", + "help_text": "Type: `file`. Expression counts on transcript level (isoforms)" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "merged_gene_counts": { + "type": + "string", + "description": "Type: `file`, default: `rsem.merged.gene_counts.tsv`. File containing gene counts across all samples", + "help_text": "Type: `file`, default: `rsem.merged.gene_counts.tsv`. File containing gene counts across all samples." + , + "default":"rsem.merged.gene_counts.tsv" + } + + + , + "merged_gene_tpm": { + "type": + "string", + "description": "Type: `file`, default: `rsem.merged.gene_tpm.tsv`. File containing gene TPM across all samples", + "help_text": "Type: `file`, default: `rsem.merged.gene_tpm.tsv`. File containing gene TPM across all samples." + , + "default":"rsem.merged.gene_tpm.tsv" + } + + + , + "merged_transcript_counts": { + "type": + "string", + "description": "Type: `file`, default: `rsem.merged.transcript_counts.tsv`. File containing transcript counts across all samples", + "help_text": "Type: `file`, default: `rsem.merged.transcript_counts.tsv`. File containing transcript counts across all samples." + , + "default":"rsem.merged.transcript_counts.tsv" + } + + + , + "merged_transcript_tpm": { + "type": + "string", + "description": "Type: `file`, default: `rsem.merged.transcript_tpm.tsv`. File containing transcript TPM across all samples", + "help_text": "Type: `file`, default: `rsem.merged.transcript_tpm.tsv`. File containing transcript TPM across all samples." + , + "default":"rsem.merged.transcript_tpm.tsv" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/rseqc/rseqc_junctionannotation/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_junctionannotation/.config.vsh.yaml new file mode 100644 index 0000000..83bc494 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_junctionannotation/.config.vsh.yaml @@ -0,0 +1,300 @@ +name: "rseqc_junctionannotation" +namespace: "rseqc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "input alignment file in BAM or SAM format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--refgene" + description: "Reference gene model in bed format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--map_qual" + description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ + \ reads, default=30." + info: null + default: + - 30 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_intron" + description: "Minimum intron length (bp), default = 50." + info: null + default: + - 50 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_log" + description: "output log of junction annotation script" + info: null + default: + - "$id.junction_annotation.log" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_plot_r" + description: "r script to generate splice_junction and splice_events plot" + info: null + default: + - "$id.junction_annotation_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_junction_bed" + description: "junction annotation file (bed format)" + info: null + default: + - "$id.junction_annotation.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_junction_interact" + description: "interact file (bed format) of junctions. Can be uploaded to UCSC\ + \ genome browser or converted to bigInteract (using bedToBigBed program) for\ + \ visualization." + info: null + default: + - "$id.junction_annotation.Interact.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_junction_sheet" + description: "junction annotation file (xls format)" + info: null + default: + - "$id.junction_annotation.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_splice_events_plot" + description: "plot of splice events (pdf)" + info: null + default: + - "$id.splice_events.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_splice_junctions_plot" + description: "plot of junctions (pdf)" + info: null + default: + - "$id.splice_junctions_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Compare detected splice junctions to reference gene model.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.paired_end.sorted.bam" +- type: "file" + path: "test.bed12" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/rseqc/junctionannotation/main.nf" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "python3-pip" + - "r-base" + interactive: false + - type: "python" + user: false + packages: + - "RSeQC" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rseqc/rseqc_junctionannotation/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/rseqc/rseqc_junctionannotation" + executable: "target/nextflow/rseqc/rseqc_junctionannotation/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/rseqc/rseqc_junctionannotation/main.nf b/target/nextflow/rseqc/rseqc_junctionannotation/main.nf new file mode 100644 index 0000000..5fcdbc1 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_junctionannotation/main.nf @@ -0,0 +1,3974 @@ +// rseqc_junctionannotation v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "rseqc_junctionannotation", + "namespace" : "rseqc", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "input alignment file in BAM or SAM format", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--refgene", + "description" : "Reference gene model in bed format", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--map_qual", + "description" : "Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30.", + "default" : [ + 30 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_intron", + "description" : "Minimum intron length (bp), default = 50.", + "default" : [ + 50 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output_log", + "description" : "output log of junction annotation script", + "default" : [ + "$id.junction_annotation.log" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_plot_r", + "description" : "r script to generate splice_junction and splice_events plot", + "default" : [ + "$id.junction_annotation_plot.r" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_junction_bed", + "description" : "junction annotation file (bed format)", + "default" : [ + "$id.junction_annotation.bed" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_junction_interact", + "description" : "interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization.", + "default" : [ + "$id.junction_annotation.Interact.bed" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_junction_sheet", + "description" : "junction annotation file (xls format)", + "default" : [ + "$id.junction_annotation.xls" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_splice_events_plot", + "description" : "plot of splice events (pdf)", + "default" : [ + "$id.splice_events.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_splice_junctions_plot", + "description" : "plot of junctions (pdf)", + "default" : [ + "$id.splice_junctions_plot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Compare detected splice junctions to reference gene model.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam" + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/test.bed12" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/rseqc/junctionannotation/main.nf" + ] + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "python3-pip", + "r-base" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "RSeQC" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/rseqc/rseqc_junctionannotation/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/rseqc/rseqc_junctionannotation", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) +$( if [ ! -z ${VIASH_PAR_MAP_QUAL+x} ]; then echo "${VIASH_PAR_MAP_QUAL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_map_qual='&'#" ; else echo "# par_map_qual="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_INTRON+x} ]; then echo "${VIASH_PAR_MIN_INTRON}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_intron='&'#" ; else echo "# par_min_intron="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_LOG+x} ]; then echo "${VIASH_PAR_OUTPUT_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_log='&'#" ; else echo "# par_output_log="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_PLOT_R+x} ]; then echo "${VIASH_PAR_OUTPUT_PLOT_R}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_plot_r='&'#" ; else echo "# par_output_plot_r="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_JUNCTION_BED+x} ]; then echo "${VIASH_PAR_OUTPUT_JUNCTION_BED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_junction_bed='&'#" ; else echo "# par_output_junction_bed="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_JUNCTION_INTERACT+x} ]; then echo "${VIASH_PAR_OUTPUT_JUNCTION_INTERACT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_junction_interact='&'#" ; else echo "# par_output_junction_interact="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_JUNCTION_SHEET+x} ]; then echo "${VIASH_PAR_OUTPUT_JUNCTION_SHEET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_junction_sheet='&'#" ; else echo "# par_output_junction_sheet="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_SPLICE_EVENTS_PLOT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_splice_events_plot='&'#" ; else echo "# par_output_splice_events_plot="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_SPLICE_JUNCTIONS_PLOT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_splice_junctions_plot='&'#" ; else echo "# par_output_splice_junctions_plot="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +prefix=\\$(openssl rand -hex 8) +input="testData/unit_test_resources/test.paired_end.sorted.bam" +refgene="testData/unit_test_resources/test.bed" +junction_annotation.py \\\\ + -i \\$par_input \\\\ + -r \\$par_refgene \\\\ + -o \\$prefix \\\\ + -m \\$par_min_intron \\\\ + -q \\$par_map_qual > \\$par_output_log + +[[ -f "\\$prefix.junction.bed" ]] && mv \\$prefix.junction.bed \\$par_output_junction_bed +[[ -f "\\$prefix.junction.Interact.bed" ]] && mv \\$prefix.junction.Interact.bed \\$par_output_junction_interact +[[ -f "\\$prefix.junction.xls" ]] && mv \\$prefix.junction.xls \\$par_output_junction_sheet +[[ -f "\\$prefix.junction_plot.r" ]] && mv \\$prefix.junction_plot.r \\$par_output_plot_r +[[ -f "\\$prefix.splice_events.pdf" ]] && mv \\$prefix.splice_events.pdf \\$par_output_splice_events_plot +[[ -f "\\$prefix.splice_junction.pdf" ]] && mv \\$prefix.splice_junction.pdf \\$par_output_splice_junctions_plot +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/rseqc/rseqc_junctionannotation", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/rseqc/rseqc_junctionannotation/nextflow.config b/target/nextflow/rseqc/rseqc_junctionannotation/nextflow.config new file mode 100644 index 0000000..3e6ba9a --- /dev/null +++ b/target/nextflow/rseqc/rseqc_junctionannotation/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'rseqc/rseqc_junctionannotation' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Compare detected splice junctions to reference gene model.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/rseqc/rseqc_junctionannotation/nextflow_labels.config b/target/nextflow/rseqc/rseqc_junctionannotation/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_junctionannotation/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/rseqc/rseqc_junctionannotation/nextflow_schema.json b/target/nextflow/rseqc/rseqc_junctionannotation/nextflow_schema.json new file mode 100644 index 0000000..1f8d4ee --- /dev/null +++ b/target/nextflow/rseqc/rseqc_junctionannotation/nextflow_schema.json @@ -0,0 +1,193 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "rseqc_junctionannotation", +"description": "Compare detected splice junctions to reference gene model.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. input alignment file in BAM or SAM format", + "help_text": "Type: `file`, required. input alignment file in BAM or SAM format" + + } + + + , + "refgene": { + "type": + "string", + "description": "Type: `file`, required. Reference gene model in bed format", + "help_text": "Type: `file`, required. Reference gene model in bed format" + + } + + + , + "map_qual": { + "type": + "integer", + "description": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30", + "help_text": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30." + , + "default":30 + } + + + , + "min_intron": { + "type": + "integer", + "description": "Type: `integer`, default: `50`. Minimum intron length (bp), default = 50", + "help_text": "Type: `integer`, default: `50`. Minimum intron length (bp), default = 50." + , + "default":50 + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_log": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_annotation.log`. output log of junction annotation script", + "help_text": "Type: `file`, default: `$id.junction_annotation.log`. output log of junction annotation script" + , + "default":"$id.junction_annotation.log" + } + + + , + "output_plot_r": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_annotation_plot.r`. r script to generate splice_junction and splice_events plot", + "help_text": "Type: `file`, default: `$id.junction_annotation_plot.r`. r script to generate splice_junction and splice_events plot" + , + "default":"$id.junction_annotation_plot.r" + } + + + , + "output_junction_bed": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_annotation.bed`. junction annotation file (bed format)", + "help_text": "Type: `file`, default: `$id.junction_annotation.bed`. junction annotation file (bed format)" + , + "default":"$id.junction_annotation.bed" + } + + + , + "output_junction_interact": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_annotation.Interact.bed`. interact file (bed format) of junctions", + "help_text": "Type: `file`, default: `$id.junction_annotation.Interact.bed`. interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization." + , + "default":"$id.junction_annotation.Interact.bed" + } + + + , + "output_junction_sheet": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_annotation.xls`. junction annotation file (xls format)", + "help_text": "Type: `file`, default: `$id.junction_annotation.xls`. junction annotation file (xls format)" + , + "default":"$id.junction_annotation.xls" + } + + + , + "output_splice_events_plot": { + "type": + "string", + "description": "Type: `file`, default: `$id.splice_events.pdf`. plot of splice events (pdf)", + "help_text": "Type: `file`, default: `$id.splice_events.pdf`. plot of splice events (pdf)" + , + "default":"$id.splice_events.pdf" + } + + + , + "output_splice_junctions_plot": { + "type": + "string", + "description": "Type: `file`, default: `$id.splice_junctions_plot.pdf`. plot of junctions (pdf)", + "help_text": "Type: `file`, default: `$id.splice_junctions_plot.pdf`. plot of junctions (pdf)" + , + "default":"$id.splice_junctions_plot.pdf" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/rseqc/rseqc_junctionsaturation/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_junctionsaturation/.config.vsh.yaml new file mode 100644 index 0000000..e6a5795 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_junctionsaturation/.config.vsh.yaml @@ -0,0 +1,289 @@ +name: "rseqc_junctionsaturation" +namespace: "rseqc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "input alignment file in BAM or SAM format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--refgene" + description: "Reference gene model in bed format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sampling_percentile_lower_bound" + description: "Sampling starts from this percentile, must be an integer between\ + \ 0 and 100, default =5." + info: null + default: + - 5 + required: false + min: 0 + max: 100 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sampling_percentile_upper_bound" + description: "Sampling ends at this percentile, must be an integer between 0 and\ + \ 100, default =5." + info: null + default: + - 100 + required: false + min: 0 + max: 100 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sampling_percentile_step" + description: "Sampling frequency in %. Smaller value means more sampling times.\ + \ Must be an integer between 0 and 100, default = 5." + info: null + default: + - 5 + required: false + min: 0 + max: 100 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_intron" + description: "Minimum intron length (bp), default = 50." + info: null + default: + - 50 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_splice_read" + description: "Minimum number of supporting reads to call a junction, default =\ + \ 1." + info: null + default: + - 1 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--map_qual" + description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ + \ reads, default=30." + info: null + default: + - 30 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_plot_r" + description: "r script to generate junction_saturation_plot plot" + info: null + default: + - "$id.junction_saturation_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_plot" + description: "plot of junction saturation (pdf)" + info: null + default: + - "$id.junction_saturation_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Compare detected splice junctions to reference gene model.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.paired_end.sorted.bam" +- type: "file" + path: "test.bed" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/rseqc/junctionsaturation/main.nf" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "python3-pip" + - "r-base" + interactive: false + - type: "python" + user: false + packages: + - "RSeQC" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rseqc/rseqc_junctionsaturation/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/rseqc/rseqc_junctionsaturation" + executable: "target/nextflow/rseqc/rseqc_junctionsaturation/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/rseqc/rseqc_junctionsaturation/main.nf b/target/nextflow/rseqc/rseqc_junctionsaturation/main.nf new file mode 100644 index 0000000..903971f --- /dev/null +++ b/target/nextflow/rseqc/rseqc_junctionsaturation/main.nf @@ -0,0 +1,3957 @@ +// rseqc_junctionsaturation v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "rseqc_junctionsaturation", + "namespace" : "rseqc", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "input alignment file in BAM or SAM format", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--refgene", + "description" : "Reference gene model in bed format", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sampling_percentile_lower_bound", + "description" : "Sampling starts from this percentile, must be an integer between 0 and 100, default =5.", + "default" : [ + 5 + ], + "required" : false, + "min" : 0, + "max" : 100, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sampling_percentile_upper_bound", + "description" : "Sampling ends at this percentile, must be an integer between 0 and 100, default =5.", + "default" : [ + 100 + ], + "required" : false, + "min" : 0, + "max" : 100, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sampling_percentile_step", + "description" : "Sampling frequency in %. Smaller value means more sampling times. Must be an integer between 0 and 100, default = 5.", + "default" : [ + 5 + ], + "required" : false, + "min" : 0, + "max" : 100, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_intron", + "description" : "Minimum intron length (bp), default = 50.", + "default" : [ + 50 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_splice_read", + "description" : "Minimum number of supporting reads to call a junction, default = 1.", + "default" : [ + 1 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--map_qual", + "description" : "Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30.", + "default" : [ + 30 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output_plot_r", + "description" : "r script to generate junction_saturation_plot plot", + "default" : [ + "$id.junction_saturation_plot.r" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_plot", + "description" : "plot of junction saturation (pdf)", + "default" : [ + "$id.junction_saturation_plot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Compare detected splice junctions to reference gene model.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam" + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/test.bed" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/rseqc/junctionsaturation/main.nf" + ] + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "python3-pip", + "r-base" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "RSeQC" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/rseqc/rseqc_junctionsaturation/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/rseqc/rseqc_junctionsaturation", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND+x} ]; then echo "${VIASH_PAR_SAMPLING_PERCENTILE_LOWER_BOUND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sampling_percentile_lower_bound='&'#" ; else echo "# par_sampling_percentile_lower_bound="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND+x} ]; then echo "${VIASH_PAR_SAMPLING_PERCENTILE_UPPER_BOUND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sampling_percentile_upper_bound='&'#" ; else echo "# par_sampling_percentile_upper_bound="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLING_PERCENTILE_STEP+x} ]; then echo "${VIASH_PAR_SAMPLING_PERCENTILE_STEP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sampling_percentile_step='&'#" ; else echo "# par_sampling_percentile_step="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_INTRON+x} ]; then echo "${VIASH_PAR_MIN_INTRON}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_intron='&'#" ; else echo "# par_min_intron="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_SPLICE_READ+x} ]; then echo "${VIASH_PAR_MIN_SPLICE_READ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_splice_read='&'#" ; else echo "# par_min_splice_read="; fi ) +$( if [ ! -z ${VIASH_PAR_MAP_QUAL+x} ]; then echo "${VIASH_PAR_MAP_QUAL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_map_qual='&'#" ; else echo "# par_map_qual="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_PLOT_R+x} ]; then echo "${VIASH_PAR_OUTPUT_PLOT_R}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_plot_r='&'#" ; else echo "# par_output_plot_r="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_PLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_PLOT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_plot='&'#" ; else echo "# par_output_plot="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +prefix=\\$(openssl rand -hex 8) + +junction_saturation.py \\\\ + -i \\$par_input \\\\ + -r \\$par_refgene \\\\ + -o \\$prefix \\\\ + -l \\$par_sampling_percentile_lower_bound \\\\ + -u \\$par_sampling_percentile_upper_bound \\\\ + -s \\$par_sampling_percentile_step \\\\ + -m \\$par_min_intron \\\\ + -v \\$par_min_splice_read \\\\ + -q \\$par_map_qual + +[[ -f "\\$prefix.junctionSaturation_plot.pdf" ]] && mv \\$prefix.junctionSaturation_plot.pdf \\$par_output_plot +[[ -f "\\$prefix.junctionSaturation_plot.r" ]] && mv \\$prefix.junctionSaturation_plot.r \\$par_output_plot_r +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/rseqc/rseqc_junctionsaturation", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/rseqc/rseqc_junctionsaturation/nextflow.config b/target/nextflow/rseqc/rseqc_junctionsaturation/nextflow.config new file mode 100644 index 0000000..b8ad664 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_junctionsaturation/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'rseqc/rseqc_junctionsaturation' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Compare detected splice junctions to reference gene model.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/rseqc/rseqc_junctionsaturation/nextflow_labels.config b/target/nextflow/rseqc/rseqc_junctionsaturation/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_junctionsaturation/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/rseqc/rseqc_junctionsaturation/nextflow_schema.json b/target/nextflow/rseqc/rseqc_junctionsaturation/nextflow_schema.json new file mode 100644 index 0000000..ee7a48b --- /dev/null +++ b/target/nextflow/rseqc/rseqc_junctionsaturation/nextflow_schema.json @@ -0,0 +1,182 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "rseqc_junctionsaturation", +"description": "Compare detected splice junctions to reference gene model.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. input alignment file in BAM or SAM format", + "help_text": "Type: `file`, required. input alignment file in BAM or SAM format" + + } + + + , + "refgene": { + "type": + "string", + "description": "Type: `file`, required. Reference gene model in bed format", + "help_text": "Type: `file`, required. Reference gene model in bed format" + + } + + + , + "sampling_percentile_lower_bound": { + "type": + "integer", + "description": "Type: `integer`, default: `5`. Sampling starts from this percentile, must be an integer between 0 and 100, default =5", + "help_text": "Type: `integer`, default: `5`. Sampling starts from this percentile, must be an integer between 0 and 100, default =5." + , + "default":5 + } + + + , + "sampling_percentile_upper_bound": { + "type": + "integer", + "description": "Type: `integer`, default: `100`. Sampling ends at this percentile, must be an integer between 0 and 100, default =5", + "help_text": "Type: `integer`, default: `100`. Sampling ends at this percentile, must be an integer between 0 and 100, default =5." + , + "default":100 + } + + + , + "sampling_percentile_step": { + "type": + "integer", + "description": "Type: `integer`, default: `5`. Sampling frequency in %", + "help_text": "Type: `integer`, default: `5`. Sampling frequency in %. Smaller value means more sampling times. Must be an integer between 0 and 100, default = 5." + , + "default":5 + } + + + , + "min_intron": { + "type": + "integer", + "description": "Type: `integer`, default: `50`. Minimum intron length (bp), default = 50", + "help_text": "Type: `integer`, default: `50`. Minimum intron length (bp), default = 50." + , + "default":50 + } + + + , + "min_splice_read": { + "type": + "integer", + "description": "Type: `integer`, default: `1`. Minimum number of supporting reads to call a junction, default = 1", + "help_text": "Type: `integer`, default: `1`. Minimum number of supporting reads to call a junction, default = 1." + , + "default":1 + } + + + , + "map_qual": { + "type": + "integer", + "description": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30", + "help_text": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30." + , + "default":30 + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_plot_r": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_saturation_plot.r`. r script to generate junction_saturation_plot plot", + "help_text": "Type: `file`, default: `$id.junction_saturation_plot.r`. r script to generate junction_saturation_plot plot" + , + "default":"$id.junction_saturation_plot.r" + } + + + , + "output_plot": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_saturation_plot.pdf`. plot of junction saturation (pdf)", + "help_text": "Type: `file`, default: `$id.junction_saturation_plot.pdf`. plot of junction saturation (pdf)" + , + "default":"$id.junction_saturation_plot.pdf" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/rseqc/rseqc_readdistribution/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_readdistribution/.config.vsh.yaml new file mode 100644 index 0000000..2b2c8aa --- /dev/null +++ b/target/nextflow/rseqc/rseqc_readdistribution/.config.vsh.yaml @@ -0,0 +1,202 @@ +name: "rseqc_readdistribution" +namespace: "rseqc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "input alignment file in BAM or SAM format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--refgene" + description: "Reference gene model in bed format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + description: "output file (txt) of read distribution analysis." + info: null + default: + - "$id.read_distribution.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Calculate how mapped reads are distributed over genomic features.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.paired_end.sorted.bam" +- type: "file" + path: "test.bed12" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/rseqc/readdistribution/main.nf" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "python3-pip" + interactive: false + - type: "python" + user: false + packages: + - "RSeQC" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rseqc/rseqc_readdistribution/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/rseqc/rseqc_readdistribution" + executable: "target/nextflow/rseqc/rseqc_readdistribution/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/rseqc/rseqc_readdistribution/main.nf b/target/nextflow/rseqc/rseqc_readdistribution/main.nf new file mode 100644 index 0000000..87ddf64 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_readdistribution/main.nf @@ -0,0 +1,3843 @@ +// rseqc_readdistribution v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "rseqc_readdistribution", + "namespace" : "rseqc", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "input alignment file in BAM or SAM format", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--refgene", + "description" : "Reference gene model in bed format", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "output file (txt) of read distribution analysis.", + "default" : [ + "$id.read_distribution.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Calculate how mapped reads are distributed over genomic features.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam" + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/test.bed12" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/rseqc/readdistribution/main.nf" + ] + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "python3-pip" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "RSeQC" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/rseqc/rseqc_readdistribution/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/rseqc/rseqc_readdistribution", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +read_distribution.py \\\\ + -i \\$par_input \\\\ + -r \\$par_refgene \\\\ +> \\$par_output +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/rseqc/rseqc_readdistribution", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/rseqc/rseqc_readdistribution/nextflow.config b/target/nextflow/rseqc/rseqc_readdistribution/nextflow.config new file mode 100644 index 0000000..67fac47 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_readdistribution/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'rseqc/rseqc_readdistribution' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Calculate how mapped reads are distributed over genomic features.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/rseqc/rseqc_readdistribution/nextflow_labels.config b/target/nextflow/rseqc/rseqc_readdistribution/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_readdistribution/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/rseqc/rseqc_readdistribution/nextflow_schema.json b/target/nextflow/rseqc/rseqc_readdistribution/nextflow_schema.json new file mode 100644 index 0000000..25670b5 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_readdistribution/nextflow_schema.json @@ -0,0 +1,105 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "rseqc_readdistribution", +"description": "Calculate how mapped reads are distributed over genomic features.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. input alignment file in BAM or SAM format", + "help_text": "Type: `file`, required. input alignment file in BAM or SAM format" + + } + + + , + "refgene": { + "type": + "string", + "description": "Type: `file`, required. Reference gene model in bed format", + "help_text": "Type: `file`, required. Reference gene model in bed format" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.read_distribution.txt`. output file (txt) of read distribution analysis", + "help_text": "Type: `file`, default: `$id.read_distribution.txt`. output file (txt) of read distribution analysis." + , + "default":"$id.read_distribution.txt" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/rseqc/rseqc_readduplication/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_readduplication/.config.vsh.yaml new file mode 100644 index 0000000..b3d22df --- /dev/null +++ b/target/nextflow/rseqc/rseqc_readduplication/.config.vsh.yaml @@ -0,0 +1,251 @@ +name: "rseqc_readduplication" +namespace: "rseqc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "input alignment file in BAM or SAM format" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--read_count_upper_limit" + description: "Upper limit of reads' occurence. Only used for plotting, default\ + \ = 500 (times)." + info: null + default: + - 500 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--map_qual" + description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ + \ reads, default=30." + info: null + default: + - 30 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_duplication_rate_plot_r" + description: "R script for generating duplication rate plot" + info: null + default: + - "$id.duplication_rate_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_duplication_rate_plot" + description: "duplication rate plot (pdf)" + info: null + default: + - "$id.duplication_rate_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_duplication_rate_mapping" + description: "Summary of mapping-based read duplication" + info: null + default: + - "$id.duplication_rate_mapping.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_duplication_rate_sequence" + description: "Summary of sequencing-based read duplication" + info: null + default: + - "$id.duplication_rate_sequencing.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Calculate read duplication rate.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.paired_end.sorted.bam" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/rseqc/readduplication/main.nf" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "python3-pip" + - "r-base" + interactive: false + - type: "python" + user: false + packages: + - "RSeQC" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rseqc/rseqc_readduplication/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/rseqc/rseqc_readduplication" + executable: "target/nextflow/rseqc/rseqc_readduplication/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/rseqc/rseqc_readduplication/main.nf b/target/nextflow/rseqc/rseqc_readduplication/main.nf new file mode 100644 index 0000000..b2f6290 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_readduplication/main.nf @@ -0,0 +1,3909 @@ +// rseqc_readduplication v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "rseqc_readduplication", + "namespace" : "rseqc", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "input alignment file in BAM or SAM format", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--read_count_upper_limit", + "description" : "Upper limit of reads' occurence. Only used for plotting, default = 500 (times).", + "default" : [ + 500 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--map_qual", + "description" : "Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30.", + "default" : [ + 30 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output_duplication_rate_plot_r", + "description" : "R script for generating duplication rate plot", + "default" : [ + "$id.duplication_rate_plot.r" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_duplication_rate_plot", + "description" : "duplication rate plot (pdf)", + "default" : [ + "$id.duplication_rate_plot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_duplication_rate_mapping", + "description" : "Summary of mapping-based read duplication", + "default" : [ + "$id.duplication_rate_mapping.xls" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_duplication_rate_sequence", + "description" : "Summary of sequencing-based read duplication", + "default" : [ + "$id.duplication_rate_sequencing.xls" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Calculate read duplication rate.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/rseqc/readduplication/main.nf" + ] + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "python3-pip", + "r-base" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "RSeQC" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/rseqc/rseqc_readduplication/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/rseqc/rseqc_readduplication", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_READ_COUNT_UPPER_LIMIT+x} ]; then echo "${VIASH_PAR_READ_COUNT_UPPER_LIMIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_read_count_upper_limit='&'#" ; else echo "# par_read_count_upper_limit="; fi ) +$( if [ ! -z ${VIASH_PAR_MAP_QUAL+x} ]; then echo "${VIASH_PAR_MAP_QUAL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_map_qual='&'#" ; else echo "# par_map_qual="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT_R}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_duplication_rate_plot_r='&'#" ; else echo "# par_output_duplication_rate_plot_r="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPLICATION_RATE_PLOT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_duplication_rate_plot='&'#" ; else echo "# par_output_duplication_rate_plot="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPLICATION_RATE_MAPPING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_duplication_rate_mapping='&'#" ; else echo "# par_output_duplication_rate_mapping="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE+x} ]; then echo "${VIASH_PAR_OUTPUT_DUPLICATION_RATE_SEQUENCE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_duplication_rate_sequence='&'#" ; else echo "# par_output_duplication_rate_sequence="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +prefix=\\$(openssl rand -hex 8) + +read_duplication.py \\\\ + -i \\$par_input \\\\ + -o \\$prefix \\\\ + -u \\$par_read_count_upper_limit \\\\ + -q \\$par_map_qual + +[[ -f "\\$prefix.DupRate_plot.pdf" ]] && mv \\$prefix.DupRate_plot.pdf \\$par_output_duplication_rate_plot +[[ -f "\\$prefix.DupRate_plot.r" ]] && mv \\$prefix.DupRate_plot.r \\$par_output_duplication_rate_plot_r +[[ -f "\\$prefix.pos.DupRate.xls" ]] && mv \\$prefix.pos.DupRate.xls \\$par_output_duplication_rate_mapping +[[ -f "\\$prefix.seq.DupRate.xls" ]] && mv \\$prefix.seq.DupRate.xls \\$par_output_duplication_rate_sequence +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/rseqc/rseqc_readduplication", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/rseqc/rseqc_readduplication/nextflow.config b/target/nextflow/rseqc/rseqc_readduplication/nextflow.config new file mode 100644 index 0000000..a882a24 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_readduplication/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'rseqc/rseqc_readduplication' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Calculate read duplication rate.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/rseqc/rseqc_readduplication/nextflow_labels.config b/target/nextflow/rseqc/rseqc_readduplication/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_readduplication/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/rseqc/rseqc_readduplication/nextflow_schema.json b/target/nextflow/rseqc/rseqc_readduplication/nextflow_schema.json new file mode 100644 index 0000000..3217c51 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_readduplication/nextflow_schema.json @@ -0,0 +1,150 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "rseqc_readduplication", +"description": "Calculate read duplication rate.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. input alignment file in BAM or SAM format", + "help_text": "Type: `file`, required. input alignment file in BAM or SAM format" + + } + + + , + "read_count_upper_limit": { + "type": + "integer", + "description": "Type: `integer`, default: `500`. Upper limit of reads\u0027 occurence", + "help_text": "Type: `integer`, default: `500`. Upper limit of reads\u0027 occurence. Only used for plotting, default = 500 (times)." + , + "default":500 + } + + + , + "map_qual": { + "type": + "integer", + "description": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30", + "help_text": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30." + , + "default":30 + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_duplication_rate_plot_r": { + "type": + "string", + "description": "Type: `file`, default: `$id.duplication_rate_plot.r`. R script for generating duplication rate plot", + "help_text": "Type: `file`, default: `$id.duplication_rate_plot.r`. R script for generating duplication rate plot" + , + "default":"$id.duplication_rate_plot.r" + } + + + , + "output_duplication_rate_plot": { + "type": + "string", + "description": "Type: `file`, default: `$id.duplication_rate_plot.pdf`. duplication rate plot (pdf)", + "help_text": "Type: `file`, default: `$id.duplication_rate_plot.pdf`. duplication rate plot (pdf)" + , + "default":"$id.duplication_rate_plot.pdf" + } + + + , + "output_duplication_rate_mapping": { + "type": + "string", + "description": "Type: `file`, default: `$id.duplication_rate_mapping.xls`. Summary of mapping-based read duplication", + "help_text": "Type: `file`, default: `$id.duplication_rate_mapping.xls`. Summary of mapping-based read duplication" + , + "default":"$id.duplication_rate_mapping.xls" + } + + + , + "output_duplication_rate_sequence": { + "type": + "string", + "description": "Type: `file`, default: `$id.duplication_rate_sequencing.xls`. Summary of sequencing-based read duplication", + "help_text": "Type: `file`, default: `$id.duplication_rate_sequencing.xls`. Summary of sequencing-based read duplication" + , + "default":"$id.duplication_rate_sequencing.xls" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/rseqc/rseqc_tin/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_tin/.config.vsh.yaml new file mode 100644 index 0000000..dba85ed --- /dev/null +++ b/target/nextflow/rseqc/rseqc_tin/.config.vsh.yaml @@ -0,0 +1,254 @@ +name: "rseqc_tin" +namespace: "rseqc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--bam_input" + description: "Path to input alignment file in BAM or SAM format." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bai_input" + description: "Path to bam index file in bai format." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--refgene" + description: "BED file containing the reference gene model" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--minimum_coverage" + description: "Minimum number of reads mapped to a transcript, default = 10." + info: null + default: + - 10 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sample_size" + description: "Number of equal-spaced nucleotide positions picked from mRNA. Note,\ + \ if this number is larger than the length of mRNA (L), it will be halved until\ + \ it's smaller than L (default = 100)" + info: null + default: + - 100 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--subtract_background" + description: "Set flag to subtract background noise (estimated from intronic reads).\ + \ Only use this option if there are substantial intronic reads." + info: null + direction: "input" +- name: "Output" + arguments: + - type: "file" + name: "--output_tin_summary" + description: "summary statistics (txt) of calculated TIN metrics" + info: null + default: + - "$id.tin_summary.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_tin" + description: "file with TIN metrics (xls)" + info: null + default: + - "$id.tin.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Calculte TIN (transcript integrity number) from RNA-seq reads\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.paired_end.sorted.bam" +- type: "file" + path: "test.paired_end.sorted.bam.bai" +- type: "file" + path: "test.bed12" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/rseqc/tin/main.nf" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "python3-pip" + interactive: false + - type: "docker" + run: + - "pip3 install RSeQC\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rseqc/tin/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/rseqc/rseqc_tin" + executable: "target/nextflow/rseqc/rseqc_tin/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/rseqc/rseqc_tin/main.nf b/target/nextflow/rseqc/rseqc_tin/main.nf new file mode 100644 index 0000000..98fe172 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_tin/main.nf @@ -0,0 +1,3928 @@ +// rseqc_tin v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "rseqc_tin", + "namespace" : "rseqc", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--bam_input", + "description" : "Path to input alignment file in BAM or SAM format.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bai_input", + "description" : "Path to bam index file in bai format.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--refgene", + "description" : "BED file containing the reference gene model", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--minimum_coverage", + "description" : "Minimum number of reads mapped to a transcript, default = 10.", + "default" : [ + 10 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sample_size", + "description" : "Number of equal-spaced nucleotide positions picked from mRNA. Note, if this number is larger than the length of mRNA (L), it will be halved until it's smaller than L (default = 100)", + "default" : [ + 100 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--subtract_background", + "description" : "Set flag to subtract background noise (estimated from intronic reads). Only use this option if there are substantial intronic reads.", + "direction" : "input" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output_tin_summary", + "description" : "summary statistics (txt) of calculated TIN metrics", + "default" : [ + "$id.tin_summary.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_tin", + "description" : "file with TIN metrics (xls)", + "default" : [ + "$id.tin.xls" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Calculte TIN (transcript integrity number) from RNA-seq reads\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam" + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam.bai" + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/test.bed12" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/rseqc/tin/main.nf" + ] + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "python3-pip" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "pip3 install RSeQC\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/rseqc/tin/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/rseqc/rseqc_tin", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_BAM_INPUT+x} ]; then echo "${VIASH_PAR_BAM_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_input='&'#" ; else echo "# par_bam_input="; fi ) +$( if [ ! -z ${VIASH_PAR_BAI_INPUT+x} ]; then echo "${VIASH_PAR_BAI_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bai_input='&'#" ; else echo "# par_bai_input="; fi ) +$( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) +$( if [ ! -z ${VIASH_PAR_MINIMUM_COVERAGE+x} ]; then echo "${VIASH_PAR_MINIMUM_COVERAGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_minimum_coverage='&'#" ; else echo "# par_minimum_coverage="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_SIZE+x} ]; then echo "${VIASH_PAR_SAMPLE_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_size='&'#" ; else echo "# par_sample_size="; fi ) +$( if [ ! -z ${VIASH_PAR_SUBTRACT_BACKGROUND+x} ]; then echo "${VIASH_PAR_SUBTRACT_BACKGROUND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_subtract_background='&'#" ; else echo "# par_subtract_background="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_TIN_SUMMARY+x} ]; then echo "${VIASH_PAR_OUTPUT_TIN_SUMMARY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_tin_summary='&'#" ; else echo "# par_output_tin_summary="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_TIN+x} ]; then echo "${VIASH_PAR_OUTPUT_TIN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_tin='&'#" ; else echo "# par_output_tin="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +bam_file="\\$(basename -- \\$par_bam_input)" +bai_file="\\$(basename -- \\$par_bai_input)" + +echo "\\$bam_file" +echo "\\$bai_file" + +tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXXXX") +function clean_up { + rm -rf "\\$tmpdir" +} + +cp \\$par_bam_input \\$tmpdir/\\$bam_file +cp \\$par_bai_input \\$tmpdir/\\$bai_file + +tin.py \\\\ + -i \\$tmpdir/\\$bam_file \\\\ + -r \\$par_refgene \\\\ + -c \\$par_minimum_coverage \\\\ + -n \\$par_sample_size \\\\ + -s \\$par_subtract_background + +[[ -f "\\${bam_file%.*}.summary.txt" ]] && mv \\${bam_file%.*}.summary.txt \\$par_output_tin_summary +[[ -f "\\${bam_file%.*}.tin.xls" ]] && mv \\${bam_file%.*}.tin.xls \\$par_output_tin + +clean_up +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/rseqc/rseqc_tin", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/rseqc/rseqc_tin/nextflow.config b/target/nextflow/rseqc/rseqc_tin/nextflow.config new file mode 100644 index 0000000..d4083c5 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_tin/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'rseqc/rseqc_tin' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Calculte TIN (transcript integrity number) from RNA-seq reads\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/rseqc/rseqc_tin/nextflow_labels.config b/target/nextflow/rseqc/rseqc_tin/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/rseqc/rseqc_tin/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/rseqc/rseqc_tin/nextflow_schema.json b/target/nextflow/rseqc/rseqc_tin/nextflow_schema.json new file mode 100644 index 0000000..becf8ff --- /dev/null +++ b/target/nextflow/rseqc/rseqc_tin/nextflow_schema.json @@ -0,0 +1,159 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "rseqc_tin", +"description": "Calculte TIN (transcript integrity number) from RNA-seq reads\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "bam_input": { + "type": + "string", + "description": "Type: `file`, required. Path to input alignment file in BAM or SAM format", + "help_text": "Type: `file`, required. Path to input alignment file in BAM or SAM format." + + } + + + , + "bai_input": { + "type": + "string", + "description": "Type: `file`, required. Path to bam index file in bai format", + "help_text": "Type: `file`, required. Path to bam index file in bai format." + + } + + + , + "refgene": { + "type": + "string", + "description": "Type: `file`, required. BED file containing the reference gene model", + "help_text": "Type: `file`, required. BED file containing the reference gene model" + + } + + + , + "minimum_coverage": { + "type": + "integer", + "description": "Type: `integer`, default: `10`. Minimum number of reads mapped to a transcript, default = 10", + "help_text": "Type: `integer`, default: `10`. Minimum number of reads mapped to a transcript, default = 10." + , + "default":10 + } + + + , + "sample_size": { + "type": + "integer", + "description": "Type: `integer`, default: `100`. Number of equal-spaced nucleotide positions picked from mRNA", + "help_text": "Type: `integer`, default: `100`. Number of equal-spaced nucleotide positions picked from mRNA. Note, if this number is larger than the length of mRNA (L), it will be halved until it\u0027s smaller than L (default = 100)" + , + "default":100 + } + + + , + "subtract_background": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Set flag to subtract background noise (estimated from intronic reads)", + "help_text": "Type: `boolean_true`, default: `false`. Set flag to subtract background noise (estimated from intronic reads). Only use this option if there are substantial intronic reads." + , + "default":false + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_tin_summary": { + "type": + "string", + "description": "Type: `file`, default: `$id.tin_summary.txt`. summary statistics (txt) of calculated TIN metrics", + "help_text": "Type: `file`, default: `$id.tin_summary.txt`. summary statistics (txt) of calculated TIN metrics" + , + "default":"$id.tin_summary.txt" + } + + + , + "output_tin": { + "type": + "string", + "description": "Type: `file`, default: `$id.tin.xls`. file with TIN metrics (xls)", + "help_text": "Type: `file`, default: `$id.tin.xls`. file with TIN metrics (xls)" + , + "default":"$id.tin.xls" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/sortmerna/.config.vsh.yaml b/target/nextflow/sortmerna/.config.vsh.yaml new file mode 100644 index 0000000..a683d43 --- /dev/null +++ b/target/nextflow/sortmerna/.config.vsh.yaml @@ -0,0 +1,232 @@ +name: "sortmerna" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "boolean" + name: "--paired" + description: "Are the reads single-end or paired-end" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input" + description: "Input fastq" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--ribo_database_manifest" + description: "Text file containing paths to fasta files (one per line) that will\ + \ be used to create the database for SortMeRNA." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--sortmerna_log" + description: "Sortmerna log file." + info: null + default: + - "$id.sortmerna.log" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_1" + description: "Output file for read 1." + info: null + default: + - "$id.$key.read_1.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Output file for read 2." + info: null + default: + - "$id.$key.read_2.fastq.gz" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Local sequence alignment tool for filtering, mapping and clustering.\ + \ The main application of SortMeRNA is filtering rRNA from metatranscriptomic data.\ + \ SortMeRNA takes as input files of reads (fasta, fastq, fasta.gz, fastq.gz) and\ + \ one or multiple rRNA database file(s), and sorts apart aligned and rejected reads\ + \ into two files.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "SRR6357070_1.fastq.gz" +- type: "file" + path: "SRR6357070_2.fastq.gz" +- type: "file" + path: "rRNA" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/sortmerna/main.nf" + - "modules/nf-core/sortmerna/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/sortmerna/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/sortmerna" + executable: "target/nextflow/sortmerna/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/sortmerna/main.nf b/target/nextflow/sortmerna/main.nf new file mode 100644 index 0000000..a02f354 --- /dev/null +++ b/target/nextflow/sortmerna/main.nf @@ -0,0 +1,3904 @@ +// sortmerna v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "sortmerna", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "boolean", + "name" : "--paired", + "description" : "Are the reads single-end or paired-end", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input", + "description" : "Input fastq", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--ribo_database_manifest", + "description" : "Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--sortmerna_log", + "description" : "Sortmerna log file.", + "default" : [ + "$id.sortmerna.log" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_1", + "description" : "Output file for read 1.", + "default" : [ + "$id.$key.read_1.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_2", + "description" : "Output file for read 2.", + "default" : [ + "$id.$key.read_2.fastq.gz" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Local sequence alignment tool for filtering, mapping and clustering. The main application of SortMeRNA is filtering rRNA from metatranscriptomic data. SortMeRNA takes as input files of reads (fasta, fastq, fasta.gz, fastq.gz) and one or multiple rRNA database file(s), and sorts apart aligned and rejected reads into two files.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz" + }, + { + "type" : "file", + "path" : "/testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz" + }, + { + "type" : "file", + "path" : "/testData/minimal_test/reference/rRNA" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/sortmerna/main.nf", + "modules/nf-core/sortmerna/meta.yml" + ], + "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/sortmerna/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/sortmerna", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_RIBO_DATABASE_MANIFEST+x} ]; then echo "${VIASH_PAR_RIBO_DATABASE_MANIFEST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ribo_database_manifest='&'#" ; else echo "# par_ribo_database_manifest="; fi ) +$( if [ ! -z ${VIASH_PAR_SORTMERNA_LOG+x} ]; then echo "${VIASH_PAR_SORTMERNA_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sortmerna_log='&'#" ; else echo "# par_sortmerna_log="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQ_1+x} ]; then echo "${VIASH_PAR_FASTQ_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_1='&'#" ; else echo "# par_fastq_1="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQ_2+x} ]; then echo "${VIASH_PAR_FASTQ_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_2='&'#" ; else echo "# par_fastq_2="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +IFS="," read -ra input <<< "\\$par_input" +IFS=";" read -ra paths <<< "\\$par_ribo_database_manifest" +refs="" +for i in "\\${paths[@]}" +do + refs+="-ref \\$i " +done + +if [ "\\$par_paired" == "false" ]; then + sortmerna \\\\ + \\$refs \\\\ + -reads \\${input[0]} \\\\ + --threads \\${meta_cpus:-1} \\\\ + --workdir . \\\\ + --aligned rRNA_reads \\\\ + --fastx \\\\ + -num_alignments 1 \\\\ + --other non_rRNA_reads + mv non_rRNA_reads.f*q.gz "\\$par_fastq_1" +else + sortmerna \\\\ + \\$refs \\\\ + -reads \\${input[0]} \\\\ + --reads \\${input[1]} \\\\ + --threads \\${meta_cpus:-1} \\\\ + --workdir . \\\\ + --aligned rRNA_reads \\\\ + --fastx \\\\ + --num_alignments 1 \\\\ + --other non_rRNA_reads \\\\ + --paired_in \\\\ + --out2 + mv non_rRNA_reads_fwd.f*q.gz \\$par_fastq_1 + mv non_rRNA_reads_rev.f*q.gz \\$par_fastq_2 +fi + +mv rRNA_reads.log \\$par_sortmerna_log +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/sortmerna", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/sortmerna/nextflow.config b/target/nextflow/sortmerna/nextflow.config new file mode 100644 index 0000000..91e15ea --- /dev/null +++ b/target/nextflow/sortmerna/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'sortmerna' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Local sequence alignment tool for filtering, mapping and clustering. The main application of SortMeRNA is filtering rRNA from metatranscriptomic data. SortMeRNA takes as input files of reads (fasta, fastq, fasta.gz, fastq.gz) and one or multiple rRNA database file(s), and sorts apart aligned and rejected reads into two files.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/sortmerna/nextflow_labels.config b/target/nextflow/sortmerna/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/sortmerna/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/sortmerna/nextflow_schema.json b/target/nextflow/sortmerna/nextflow_schema.json new file mode 100644 index 0000000..4f376dc --- /dev/null +++ b/target/nextflow/sortmerna/nextflow_schema.json @@ -0,0 +1,137 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "sortmerna", +"description": "Local sequence alignment tool for filtering, mapping and clustering. The main application of SortMeRNA is filtering rRNA from metatranscriptomic data. SortMeRNA takes as input files of reads (fasta, fastq, fasta.gz, fastq.gz) and one or multiple rRNA database file(s), and sorts apart aligned and rejected reads into two files.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "paired": { + "type": + "boolean", + "description": "Type: `boolean`. Are the reads single-end or paired-end", + "help_text": "Type: `boolean`. Are the reads single-end or paired-end" + + } + + + , + "input": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. Input fastq", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. Input fastq" + + } + + + , + "ribo_database_manifest": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "sortmerna_log": { + "type": + "string", + "description": "Type: `file`, default: `$id.sortmerna.log`. Sortmerna log file", + "help_text": "Type: `file`, default: `$id.sortmerna.log`. Sortmerna log file." + , + "default":"$id.sortmerna.log" + } + + + , + "fastq_1": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.read_1.fastq.gz`. Output file for read 1", + "help_text": "Type: `file`, required, default: `$id.$key.read_1.fastq.gz`. Output file for read 1." + , + "default":"$id.$key.read_1.fastq.gz" + } + + + , + "fastq_2": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.read_2.fastq.gz`. Output file for read 2", + "help_text": "Type: `file`, default: `$id.$key.read_2.fastq.gz`. Output file for read 2." + , + "default":"$id.$key.read_2.fastq.gz" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/stringtie/.config.vsh.yaml b/target/nextflow/stringtie/.config.vsh.yaml new file mode 100644 index 0000000..e476253 --- /dev/null +++ b/target/nextflow/stringtie/.config.vsh.yaml @@ -0,0 +1,256 @@ +name: "stringtie" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--strandedness" + description: "Forward or reverse strand?" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--annotation_gtf" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_stringtie_args" + description: "Extra arguments for running StringTie" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--stringtie_ignore_gtf" + description: "Perform reference-guided de novo assembly of transcripts using StringTie,\ + \ i.e. don't restrict to those in GTF file." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--transcript_gtf" + info: null + default: + - "$id.$key.transcripts.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--coverage_gtf" + info: null + default: + - "$id.$key.coverage.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--abundance" + info: null + default: + - "$id.$key.abundance.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--ballgown" + description: "for running ballgown" + info: null + default: + - "$id.$key.ballgown" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Transcript assembly and quantification for RNA-Seq\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "genes.gtf" +- type: "file" + path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/stringtie/stringtie/main.nf" + - "modules/nf-core/stringtie/stringtie/meta.yml" + last_sha: "55398de6ab7577acfe9b1180016a93d7af7eb859" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && \\\napt-get install -y build-essential zlib1g wget && \\\n\ + wget --no-check-certificate https://github.com/gpertea/stringtie/releases/download/v2.2.1/stringtie-2.2.1.Linux_x86_64.tar.gz\ + \ && \\\ntar -xzf stringtie-2.2.1.Linux_x86_64.tar.gz && \\\ncp stringtie-2.2.1.Linux_x86_64/stringtie\ + \ /usr/local/bin/\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/stringtie/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/stringtie" + executable: "target/nextflow/stringtie/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/stringtie/main.nf b/target/nextflow/stringtie/main.nf new file mode 100644 index 0000000..9791998 --- /dev/null +++ b/target/nextflow/stringtie/main.nf @@ -0,0 +1,3922 @@ +// stringtie v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "stringtie", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "string", + "name" : "--strandedness", + "description" : "Forward or reverse strand?", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--annotation_gtf", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extra_stringtie_args", + "description" : "Extra arguments for running StringTie", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--stringtie_ignore_gtf", + "description" : "Perform reference-guided de novo assembly of transcripts using StringTie, i.e. don't restrict to those in GTF file.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--transcript_gtf", + "default" : [ + "$id.$key.transcripts.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--coverage_gtf", + "default" : [ + "$id.$key.coverage.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--abundance", + "default" : [ + "$id.$key.abundance.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--ballgown", + "description" : "for running ballgown", + "default" : [ + "$id.$key.ballgown" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Transcript assembly and quantification for RNA-Seq\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/genes.gtf" + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/stringtie/stringtie/main.nf", + "modules/nf-core/stringtie/stringtie/meta.yml" + ], + "last_sha" : "55398de6ab7577acfe9b1180016a93d7af7eb859" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\napt-get install -y build-essential zlib1g wget && \\\\\nwget --no-check-certificate https://github.com/gpertea/stringtie/releases/download/v2.2.1/stringtie-2.2.1.Linux_x86_64.tar.gz && \\\\\ntar -xzf stringtie-2.2.1.Linux_x86_64.tar.gz && \\\\\ncp stringtie-2.2.1.Linux_x86_64/stringtie /usr/local/bin/\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/stringtie/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/stringtie", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_ANNOTATION_GTF+x} ]; then echo "${VIASH_PAR_ANNOTATION_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_annotation_gtf='&'#" ; else echo "# par_annotation_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRA_STRINGTIE_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_STRINGTIE_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extra_stringtie_args='&'#" ; else echo "# par_extra_stringtie_args="; fi ) +$( if [ ! -z ${VIASH_PAR_STRINGTIE_IGNORE_GTF+x} ]; then echo "${VIASH_PAR_STRINGTIE_IGNORE_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_stringtie_ignore_gtf='&'#" ; else echo "# par_stringtie_ignore_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_TRANSCRIPT_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPT_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcript_gtf='&'#" ; else echo "# par_transcript_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_COVERAGE_GTF+x} ]; then echo "${VIASH_PAR_COVERAGE_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_coverage_gtf='&'#" ; else echo "# par_coverage_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_ABUNDANCE+x} ]; then echo "${VIASH_PAR_ABUNDANCE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_abundance='&'#" ; else echo "# par_abundance="; fi ) +$( if [ ! -z ${VIASH_PAR_BALLGOWN+x} ]; then echo "${VIASH_PAR_BALLGOWN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ballgown='&'#" ; else echo "# par_ballgown="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +if [ \\$par_strandedness=='forward' ]; then + strand='--fr' +elif [ \\$par_strandedness=='reverse' ]; then + strand='--rf' +fi + +stringtie \\\\ + \\$par_bam \\\\ + \\$strand \\\\ + \\${par_annotation_gtf:+-G \\$par_annotation_gtf} \\\\ + -o \\$par_transcript_gtf \\\\ + -A "abundance.txt" \\\\ + \\${par_annotation_gtf:+-C "coverage.gtf"} \\\\ + \\${par_annotation_gtf:+-b "ballgown"} \\\\ + -p \\${meta_cpus:-1} \\\\ + \\$par_extra_stringtie_args \\\\ + \\${par_stringtie_ignore_gtf:+-e} + +mv coverage.gtf \\$par_coverage_gtf +mv ballgown \\$par_ballgown +mv abundance.txt \\$par_abundance +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/stringtie", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/stringtie/nextflow.config b/target/nextflow/stringtie/nextflow.config new file mode 100644 index 0000000..1206065 --- /dev/null +++ b/target/nextflow/stringtie/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'stringtie' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Transcript assembly and quantification for RNA-Seq\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/stringtie/nextflow_labels.config b/target/nextflow/stringtie/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/stringtie/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/stringtie/nextflow_schema.json b/target/nextflow/stringtie/nextflow_schema.json new file mode 100644 index 0000000..484111a --- /dev/null +++ b/target/nextflow/stringtie/nextflow_schema.json @@ -0,0 +1,168 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "stringtie", +"description": "Transcript assembly and quantification for RNA-Seq\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "strandedness": { + "type": + "string", + "description": "Type: `string`. Forward or reverse strand?", + "help_text": "Type: `string`. Forward or reverse strand?" + + } + + + , + "bam": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "annotation_gtf": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "extra_stringtie_args": { + "type": + "string", + "description": "Type: `string`. Extra arguments for running StringTie", + "help_text": "Type: `string`. Extra arguments for running StringTie" + + } + + + , + "stringtie_ignore_gtf": { + "type": + "boolean", + "description": "Type: `boolean`. Perform reference-guided de novo assembly of transcripts using StringTie, i", + "help_text": "Type: `boolean`. Perform reference-guided de novo assembly of transcripts using StringTie, i.e. don\u0027t restrict to those in GTF file." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "transcript_gtf": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.transcripts.gtf`. ", + "help_text": "Type: `file`, default: `$id.$key.transcripts.gtf`. " + , + "default":"$id.$key.transcripts.gtf" + } + + + , + "coverage_gtf": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.coverage.gtf`. ", + "help_text": "Type: `file`, default: `$id.$key.coverage.gtf`. " + , + "default":"$id.$key.coverage.gtf" + } + + + , + "abundance": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.abundance.txt`. ", + "help_text": "Type: `file`, default: `$id.$key.abundance.txt`. " + , + "default":"$id.$key.abundance.txt" + } + + + , + "ballgown": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.ballgown`. for running ballgown", + "help_text": "Type: `file`, default: `$id.$key.ballgown`. for running ballgown" + , + "default":"$id.$key.ballgown" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/summarizedexperiment/.config.vsh.yaml b/target/nextflow/summarizedexperiment/.config.vsh.yaml new file mode 100644 index 0000000..72dde7b --- /dev/null +++ b/target/nextflow/summarizedexperiment/.config.vsh.yaml @@ -0,0 +1,235 @@ +name: "summarizedexperiment" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--tpm_gene" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_length_scaled" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_scaled" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_transcript" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcript" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tx2gene_tsv" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output" + info: null + default: + - "merged_summarizedexperiment" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "summarizedexperiment.r" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Create SummarizedExperiment object from Salmon counts" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/summarizedexperiment/main.nf" + last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "rocker/r2u:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "r" + bioc: + - "SummarizedExperiment" + - "tximeta" + bioc_force_install: false + warnings_as_errors: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/summarizedexperiment/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/summarizedexperiment" + executable: "target/nextflow/summarizedexperiment/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/summarizedexperiment/main.nf b/target/nextflow/summarizedexperiment/main.nf new file mode 100644 index 0000000..726000a --- /dev/null +++ b/target/nextflow/summarizedexperiment/main.nf @@ -0,0 +1,3888 @@ +// summarizedexperiment v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "summarizedexperiment", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--tpm_gene", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene_length_scaled", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene_scaled", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tpm_transcript", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_transcript", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tx2gene_tsv", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "default" : [ + "merged_summarizedexperiment" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "summarizedexperiment.r" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Create SummarizedExperiment object from Salmon counts", + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/local/summarizedexperiment/main.nf" + ], + "last_sha" : "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "rocker/r2u:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "r", + "bioc" : [ + "SummarizedExperiment", + "tximeta" + ], + "bioc_force_install" : false, + "warnings_as_errors" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/summarizedexperiment/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/summarizedexperiment", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_TPM_GENE+x} ]; then echo "${VIASH_PAR_TPM_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tpm_gene='&'#" ; else echo "# par_tpm_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE+x} ]; then echo "${VIASH_PAR_COUNTS_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_gene='&'#" ; else echo "# par_counts_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE_LENGTH_SCALED+x} ]; then echo "${VIASH_PAR_COUNTS_GENE_LENGTH_SCALED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_gene_length_scaled='&'#" ; else echo "# par_counts_gene_length_scaled="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE_SCALED+x} ]; then echo "${VIASH_PAR_COUNTS_GENE_SCALED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_gene_scaled='&'#" ; else echo "# par_counts_gene_scaled="; fi ) +$( if [ ! -z ${VIASH_PAR_TPM_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_TPM_TRANSCRIPT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tpm_transcript='&'#" ; else echo "# par_tpm_transcript="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_COUNTS_TRANSCRIPT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_transcript='&'#" ; else echo "# par_counts_transcript="; fi ) +$( if [ ! -z ${VIASH_PAR_TX2GENE_TSV+x} ]; then echo "${VIASH_PAR_TX2GENE_TSV}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tx2gene_tsv='&'#" ; else echo "# par_tx2gene_tsv="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +mkdir -p \\$par_output + +Rscript "\\$meta_resources_dir/summarizedexperiment.r" NULL \\$par_counts_gene \\$par_tpm_gene \\$par_tx2gene_tsv + +Rscript "\\$meta_resources_dir/summarizedexperiment.r" NULL \\$par_counts_gene_length_scaled \\$par_tpm_gene \\$par_tx2gene_tsv + +Rscript "\\$meta_resources_dir/summarizedexperiment.r" NULL \\$par_counts_gene_scaled \\$par_tpm_gene \\$par_tx2gene_tsv + +Rscript "\\$meta_resources_dir/summarizedexperiment.r" NULL \\$par_counts_transcript \\$par_tpm_transcript \\$par_tx2gene_tsv + +mv \\${par_counts_gene%.*}.rds \\$par_output/ +mv \\${par_counts_gene_length_scaled%.*}.rds \\$par_output/ +mv \\${par_counts_gene_scaled%.*}.rds \\$par_output/ +mv \\${par_counts_transcript%.*}.rds \\$par_output/ +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/summarizedexperiment", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/summarizedexperiment/nextflow.config b/target/nextflow/summarizedexperiment/nextflow.config new file mode 100644 index 0000000..42ef78c --- /dev/null +++ b/target/nextflow/summarizedexperiment/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'summarizedexperiment' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Create SummarizedExperiment object from Salmon counts' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/summarizedexperiment/nextflow_labels.config b/target/nextflow/summarizedexperiment/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/summarizedexperiment/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/summarizedexperiment/nextflow_schema.json b/target/nextflow/summarizedexperiment/nextflow_schema.json new file mode 100644 index 0000000..c185c97 --- /dev/null +++ b/target/nextflow/summarizedexperiment/nextflow_schema.json @@ -0,0 +1,155 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "summarizedexperiment", +"description": "Create SummarizedExperiment object from Salmon counts", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "tpm_gene": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "counts_gene": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "counts_gene_length_scaled": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "counts_gene_scaled": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "tpm_transcript": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "counts_transcript": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "tx2gene_tsv": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `merged_summarizedexperiment`. ", + "help_text": "Type: `file`, default: `merged_summarizedexperiment`. " + , + "default":"merged_summarizedexperiment" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/summarizedexperiment/summarizedexperiment.r b/target/nextflow/summarizedexperiment/summarizedexperiment.r new file mode 100755 index 0000000..5d656a7 --- /dev/null +++ b/target/nextflow/summarizedexperiment/summarizedexperiment.r @@ -0,0 +1,68 @@ +#!/usr/bin/env Rscript + +library(SummarizedExperiment) + +## Create SummarizedExperiment (se) object from Salmon counts + +args <- commandArgs(trailingOnly = TRUE) +if (length(args) < 2) { + stop("Usage: salmon_se.r ", call. = FALSE) +} + +coldata <- args[1] +counts_fn <- args[2] +tpm_fn <- args[3] +tx2gene <- args[4] + +info <- file.info(tx2gene) +if (info$size == 0) { + tx2gene <- NULL +} else { + rowdata <- read.csv(tx2gene, sep = "\t", header = FALSE) + colnames(rowdata) <- c("tx", "gene_id", "gene_name") + tx2gene <- rowdata[, 1:2] +} + +counts <- read.csv(counts_fn, row.names = 1, sep = "\t") +counts <- counts[, 2:ncol(counts), drop = FALSE] # remove gene_name column +tpm <- read.csv(tpm_fn, row.names = 1, sep = "\t") +tpm <- tpm[, 2:ncol(tpm), drop = FALSE] # remove gene_name column + +if (length(intersect(rownames(counts), rowdata[["tx"]])) > length(intersect(rownames(counts), rowdata[["gene_id"]]))) { + by_what <- "tx" +} else { + by_what <- "gene_id" + rowdata <- unique(rowdata[, 2:3]) +} + +if (file.exists(coldata)) { + coldata <- read.csv(coldata, sep = "\t") + coldata <- coldata[match(colnames(counts), coldata[, 1]), ] + coldata <- cbind(files = fns, coldata) +} else { + message("ColData not avaliable ", coldata) + coldata <- data.frame(files = colnames(counts), names = colnames(counts)) +} + +rownames(coldata) <- coldata[["names"]] +extra <- setdiff(rownames(counts), as.character(rowdata[[by_what]])) +if (length(extra) > 0) { + rowdata <- rbind( + rowdata, + data.frame( + tx = extra, + gene_id = extra, + gene_name = extra + )[, colnames(rowdata)] + ) +} + +rowdata <- rowdata[match(rownames(counts), as.character(rowdata[[by_what]])), ] +rownames(rowdata) <- rowdata[[by_what]] +se <- SummarizedExperiment( + assays = list(counts = counts, abundance = tpm), + colData = DataFrame(coldata), + rowData = rowdata +) + +saveRDS(se, file = paste0(tools::file_path_sans_ext(counts_fn), ".rds")) diff --git a/target/nextflow/tx2gene/.config.vsh.yaml b/target/nextflow/tx2gene/.config.vsh.yaml new file mode 100644 index 0000000..be39559 --- /dev/null +++ b/target/nextflow/tx2gene/.config.vsh.yaml @@ -0,0 +1,225 @@ +name: "tx2gene" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--quant_results" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--gtf" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_extra_attributes" + info: null + default: + - "gene_name" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_group_features" + info: null + default: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--quant_type" + description: "Method used for quantification" + info: null + required: false + choices: + - "salmon" + - "kallisto" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--tsv" + info: null + default: + - "tx2gene.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "tx2gene.py" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Get transcript id (tx) to gene names for tximport" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/tx2gene/main.nf" + last_sha: "839ac5cab892504514cc96d44e99e70516b239d2" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "pip" + - "unzip" + interactive: false + - type: "python" + user: false + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/tx2gene/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/tx2gene" + executable: "target/nextflow/tx2gene/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/tx2gene/main.nf b/target/nextflow/tx2gene/main.nf new file mode 100644 index 0000000..d5a6399 --- /dev/null +++ b/target/nextflow/tx2gene/main.nf @@ -0,0 +1,3888 @@ +// tx2gene v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "tx2gene", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--quant_results", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--gtf_extra_attributes", + "default" : [ + "gene_name" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--gtf_group_features", + "default" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--quant_type", + "description" : "Method used for quantification", + "required" : false, + "choices" : [ + "salmon", + "kallisto" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--tsv", + "default" : [ + "tx2gene.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "tx2gene.py" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Get transcript id (tx) to gene names for tximport", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/local/tx2gene/main.nf" + ], + "last_sha" : "839ac5cab892504514cc96d44e99e70516b239d2" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "pip", + "unzip" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/tx2gene/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/tx2gene", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_QUANT_RESULTS+x} ]; then echo "${VIASH_PAR_QUANT_RESULTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quant_results='&'#" ; else echo "# par_quant_results="; fi ) +$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_GTF_EXTRA_ATTRIBUTES+x} ]; then echo "${VIASH_PAR_GTF_EXTRA_ATTRIBUTES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf_extra_attributes='&'#" ; else echo "# par_gtf_extra_attributes="; fi ) +$( if [ ! -z ${VIASH_PAR_GTF_GROUP_FEATURES+x} ]; then echo "${VIASH_PAR_GTF_GROUP_FEATURES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf_group_features='&'#" ; else echo "# par_gtf_group_features="; fi ) +$( if [ ! -z ${VIASH_PAR_QUANT_TYPE+x} ]; then echo "${VIASH_PAR_QUANT_TYPE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quant_type='&'#" ; else echo "# par_quant_type="; fi ) +$( if [ ! -z ${VIASH_PAR_TSV+x} ]; then echo "${VIASH_PAR_TSV}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tsv='&'#" ; else echo "# par_tsv="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +function clean_up { + rm -rf "\\$tmpdir" +} +trap clean_up EXIT + +tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_name-XXXXXXXX") + +IFS=";" read -ra results <<< \\$par_quant_results +for result in \\${results[*]} +do + cp -r \\$result \\$tmpdir +done + +python3 "\\$meta_resources_dir/tx2gene.py" \\\\ + --quant_type \\$par_quant_type \\\\ + --gtf \\$par_gtf \\\\ + --quants \\$tmpdir \\\\ + --id \\$par_gtf_group_features \\\\ + --extra \\$par_gtf_extra_attributes \\\\ + -o \\$par_tsv +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/tx2gene", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/tx2gene/nextflow.config b/target/nextflow/tx2gene/nextflow.config new file mode 100644 index 0000000..84525dd --- /dev/null +++ b/target/nextflow/tx2gene/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'tx2gene' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Get transcript id (tx) to gene names for tximport' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/tx2gene/nextflow_labels.config b/target/nextflow/tx2gene/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/tx2gene/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/tx2gene/nextflow_schema.json b/target/nextflow/tx2gene/nextflow_schema.json new file mode 100644 index 0000000..744cda0 --- /dev/null +++ b/target/nextflow/tx2gene/nextflow_schema.json @@ -0,0 +1,139 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "tx2gene", +"description": "Get transcript id (tx) to gene names for tximport", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "quant_results": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. " + + } + + + , + "gtf": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "gtf_extra_attributes": { + "type": + "string", + "description": "Type: `string`, default: `gene_name`. ", + "help_text": "Type: `string`, default: `gene_name`. " + , + "default":"gene_name" + } + + + , + "gtf_group_features": { + "type": + "string", + "description": "Type: `string`, default: `gene_id`. ", + "help_text": "Type: `string`, default: `gene_id`. " + , + "default":"gene_id" + } + + + , + "quant_type": { + "type": + "string", + "description": "Type: `string`, choices: ``salmon`, `kallisto``. Method used for quantification", + "help_text": "Type: `string`, choices: ``salmon`, `kallisto``. Method used for quantification", + "enum": ["salmon", "kallisto"] + + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "tsv": { + "type": + "string", + "description": "Type: `file`, default: `tx2gene.tsv`. ", + "help_text": "Type: `file`, default: `tx2gene.tsv`. " + , + "default":"tx2gene.tsv" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/tx2gene/tx2gene.py b/target/nextflow/tx2gene/tx2gene.py new file mode 100755 index 0000000..dd50079 --- /dev/null +++ b/target/nextflow/tx2gene/tx2gene.py @@ -0,0 +1,169 @@ + +#!/usr/bin/env python + +# Written by Lorena Pantano with subsequent reworking by Jonathan Manning. Released under the MIT license. + +import logging +import argparse +import glob +import os +import re +from collections import Counter, defaultdict, OrderedDict +from collections.abc import Set +from typing import Dict + +# Configure logging +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def read_top_transcripts(quant_dir: str, file_pattern: str) -> Set[str]: + """ + Read the top 100 transcripts from the quantification file. + + Parameters: + quant_dir (str): Directory where quantification files are located. + file_pattern (str): Pattern to match quantification files. + + Returns: + set: A set containing the top 100 transcripts. + """ + try: + # Find the quantification file within the directory + quant_file_path = glob.glob(os.path.join(quant_dir, file_pattern))[0] + with open(quant_file_path, "r") as file_handle: + # Read the file and extract the top 100 transcripts + return {line.split()[0] for i, line in enumerate(file_handle) if i > 0 and i <= 100} + except IndexError: + # Log an error and raise a FileNotFoundError if the quant file does not exist + logger.error("No quantification files found.") + raise FileNotFoundError("Quantification file not found.") + + +def discover_transcript_attribute(gtf_file: str, transcripts: Set[str]) -> str: + """ + Discover the attribute in the GTF that corresponds to transcripts, prioritizing 'transcript_id'. + + Parameters: + gtf_file (str): Path to the GTF file. + transcripts (Set[str]): A set of transcripts to match in the GTF file. + + Returns: + str: The attribute name that corresponds to transcripts in the GTF file. + """ + + votes = Counter() + with open(gtf_file) as inh: + # Read GTF file, skipping header lines + for line in filter(lambda x: not x.startswith("#"), inh): + cols = line.split("\t") + + # Use regular expression to correctly split the attributes string + attributes_str = cols[8] + attributes = dict(re.findall(r'(\S+) "(.*?)(? Dict[str, str]: + """ + Parse the attributes column of a GTF file. + + :param attributes_text: The attributes column as a string. + :return: A dictionary of the attributes. + """ + # Split the attributes string by semicolon and strip whitespace + attributes = attributes_text.strip().split(";") + attr_dict = OrderedDict() + + # Iterate over each attribute pair + for attribute in attributes: + # Split the attribute into key and value, ensuring there are two parts + parts = attribute.strip().split(" ", 1) + if len(parts) == 2: + key, value = parts + # Remove any double quotes from the value + value = value.replace('"', "") + attr_dict[key] = value + + return attr_dict + + +def map_transcripts_to_gene( + quant_type: str, gtf_file: str, quant_dir: str, gene_id: str, extra_id_field: str, output_file: str +) -> bool: + """ + Map transcripts to gene names and write the output to a file. + + Parameters: + quant_type (str): The quantification method used (e.g., 'salmon'). + gtf_file (str): Path to the GTF file. + quant_dir (str): Directory where quantification files are located. + gene_id (str): The gene ID attribute in the GTF file. + extra_id_field (str): Additional ID field in the GTF file. + output_file (str): The output file path. + + Returns: + bool: True if the operation was successful, False otherwise. + """ + # Read the top transcripts based on quantification type + transcripts = read_top_transcripts(quant_dir, "*quant_results.sf" if quant_type == "salmon" else "*abundance.tsv") + # Discover the attribute that corresponds to transcripts in the GTF + transcript_attribute = discover_transcript_attribute(gtf_file, transcripts) + + if not transcript_attribute: + # If no attribute is found, return False + return False + + # Open GTF and output file to write the mappings + # Initialize the set to track seen combinations + seen = set() + + with open(gtf_file) as inh, open(output_file, "w") as output_handle: + # Parse each line of the GTF, mapping transcripts to genes + for line in filter(lambda x: not x.startswith("#"), inh): + cols = line.split("\t") + attr_dict = parse_attributes(cols[8]) + if gene_id in attr_dict and transcript_attribute in attr_dict: + # Create a unique identifier for the transcript-gene combination + transcript_gene_pair = (attr_dict[transcript_attribute], attr_dict[gene_id]) + + # Check if the combination has already been seen + if transcript_gene_pair not in seen: + # If it's a new combination, write it to the output and add to the seen set + extra_id = attr_dict.get(extra_id_field, attr_dict[gene_id]) + output_handle.write(f"{attr_dict[transcript_attribute]}\t{attr_dict[gene_id]}\t{extra_id}\n") + seen.add(transcript_gene_pair) + + return True + + +# Main function to parse arguments and call the mapping function +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Map transcripts to gene names for tximport.") + parser.add_argument("--quant_type", type=str, help="Quantification type", default="salmon") + parser.add_argument("--gtf", type=str, help="GTF file", required=True) + parser.add_argument("--quants", type=str, help="Output of quantification", required=True) + parser.add_argument("--id", type=str, help="Gene ID in the GTF file", required=True) + parser.add_argument("--extra", type=str, help="Extra ID in the GTF file") + parser.add_argument("-o", "--output", dest="output", default="tx2gene.tsv", type=str, help="File with output") + + args = parser.parse_args() + if not map_transcripts_to_gene(args.quant_type, args.gtf, args.quants, args.id, args.extra, args.output): + logger.error("Failed to map transcripts to genes.") diff --git a/target/nextflow/tximport/.config.vsh.yaml b/target/nextflow/tximport/.config.vsh.yaml new file mode 100644 index 0000000..5e4d120 --- /dev/null +++ b/target/nextflow/tximport/.config.vsh.yaml @@ -0,0 +1,292 @@ +name: "tximport" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--quant_results" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--tx2gene_tsv" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--quant_type" + description: "Method used for quantification" + info: null + required: false + choices: + - "salmon" + - "kallisto" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--tpm_gene" + info: null + default: + - "merged.gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene" + info: null + default: + - "merged.gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_length_scaled" + info: null + default: + - "merged.gene_counts_length_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_scaled" + info: null + default: + - "merged.gene_counts_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--lengths_gene" + info: null + default: + - "merged.gene_lengths.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_transcript" + info: null + default: + - "merged.transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcript" + info: null + default: + - "merged.transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--lengths_transcript" + info: null + default: + - "merged.transcript_lengths.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "tximport.r" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Get dataframe linking transcript ID, gene ID, and gene name" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/local/tximport/main.nf" + last_sha: "489bcb4efdc7bd58839b22b0360d26b4d80b87a8" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "r-base" + - "libcurl4-openssl-dev" + - "libssl-dev" + - "libxml2-dev" + interactive: false + - type: "r" + cran: + - "jsonlite" + bioc: + - "SummarizedExperiment" + - "tximport" + - "tximeta" + bioc_force_install: false + warnings_as_errors: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/tximport/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/tximport" + executable: "target/nextflow/tximport/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/tximport/main.nf b/target/nextflow/tximport/main.nf new file mode 100644 index 0000000..b3ad93a --- /dev/null +++ b/target/nextflow/tximport/main.nf @@ -0,0 +1,3971 @@ +// tximport v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "tximport", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--quant_results", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tx2gene_tsv", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--quant_type", + "description" : "Method used for quantification", + "required" : false, + "choices" : [ + "salmon", + "kallisto" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--tpm_gene", + "default" : [ + "merged.gene_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene", + "default" : [ + "merged.gene_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene_length_scaled", + "default" : [ + "merged.gene_counts_length_scaled.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene_scaled", + "default" : [ + "merged.gene_counts_scaled.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--lengths_gene", + "default" : [ + "merged.gene_lengths.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tpm_transcript", + "default" : [ + "merged.transcript_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_transcript", + "default" : [ + "merged.transcript_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--lengths_transcript", + "default" : [ + "merged.transcript_lengths.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "tximport.r" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Get dataframe linking transcript ID, gene ID, and gene name", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/local/tximport/main.nf" + ], + "last_sha" : "489bcb4efdc7bd58839b22b0360d26b4d80b87a8" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "r-base", + "libcurl4-openssl-dev", + "libssl-dev", + "libxml2-dev" + ], + "interactive" : false + }, + { + "type" : "r", + "cran" : [ + "jsonlite" + ], + "bioc" : [ + "SummarizedExperiment", + "tximport", + "tximeta" + ], + "bioc_force_install" : false, + "warnings_as_errors" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/tximport/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/tximport", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_QUANT_RESULTS+x} ]; then echo "${VIASH_PAR_QUANT_RESULTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quant_results='&'#" ; else echo "# par_quant_results="; fi ) +$( if [ ! -z ${VIASH_PAR_TX2GENE_TSV+x} ]; then echo "${VIASH_PAR_TX2GENE_TSV}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tx2gene_tsv='&'#" ; else echo "# par_tx2gene_tsv="; fi ) +$( if [ ! -z ${VIASH_PAR_QUANT_TYPE+x} ]; then echo "${VIASH_PAR_QUANT_TYPE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quant_type='&'#" ; else echo "# par_quant_type="; fi ) +$( if [ ! -z ${VIASH_PAR_TPM_GENE+x} ]; then echo "${VIASH_PAR_TPM_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tpm_gene='&'#" ; else echo "# par_tpm_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE+x} ]; then echo "${VIASH_PAR_COUNTS_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_gene='&'#" ; else echo "# par_counts_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE_LENGTH_SCALED+x} ]; then echo "${VIASH_PAR_COUNTS_GENE_LENGTH_SCALED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_gene_length_scaled='&'#" ; else echo "# par_counts_gene_length_scaled="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE_SCALED+x} ]; then echo "${VIASH_PAR_COUNTS_GENE_SCALED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_gene_scaled='&'#" ; else echo "# par_counts_gene_scaled="; fi ) +$( if [ ! -z ${VIASH_PAR_LENGTHS_GENE+x} ]; then echo "${VIASH_PAR_LENGTHS_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_lengths_gene='&'#" ; else echo "# par_lengths_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_TPM_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_TPM_TRANSCRIPT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tpm_transcript='&'#" ; else echo "# par_tpm_transcript="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_COUNTS_TRANSCRIPT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_transcript='&'#" ; else echo "# par_counts_transcript="; fi ) +$( if [ ! -z ${VIASH_PAR_LENGTHS_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_LENGTHS_TRANSCRIPT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_lengths_transcript='&'#" ; else echo "# par_lengths_transcript="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +function clean_up { + rm -rf "\\$tmpdir" +} +trap clean_up EXIT + +tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_name-XXXXXXXX") + +IFS=";" read -ra results <<< \\$par_quant_results +for result in \\${results[*]} +do + cp -r \\$result \\$tmpdir +done + +Rscript "\\$meta_resources_dir/tximport.r" \\\\ + NULL \\\\ + \\$tmpdir \\\\ + merged \\\\ + \\$par_quant_type \\\\ + \\$par_tx2gene_tsv +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/tximport", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/tximport/nextflow.config b/target/nextflow/tximport/nextflow.config new file mode 100644 index 0000000..633760d --- /dev/null +++ b/target/nextflow/tximport/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'tximport' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Get dataframe linking transcript ID, gene ID, and gene name' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/tximport/nextflow_labels.config b/target/nextflow/tximport/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/tximport/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/tximport/nextflow_schema.json b/target/nextflow/tximport/nextflow_schema.json new file mode 100644 index 0000000..8c42a1d --- /dev/null +++ b/target/nextflow/tximport/nextflow_schema.json @@ -0,0 +1,194 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "tximport", +"description": "Get dataframe linking transcript ID, gene ID, and gene name", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "quant_results": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. " + + } + + + , + "tx2gene_tsv": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "quant_type": { + "type": + "string", + "description": "Type: `string`, choices: ``salmon`, `kallisto``. Method used for quantification", + "help_text": "Type: `string`, choices: ``salmon`, `kallisto``. Method used for quantification", + "enum": ["salmon", "kallisto"] + + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "tpm_gene": { + "type": + "string", + "description": "Type: `file`, default: `merged.gene_tpm.tsv`. ", + "help_text": "Type: `file`, default: `merged.gene_tpm.tsv`. " + , + "default":"merged.gene_tpm.tsv" + } + + + , + "counts_gene": { + "type": + "string", + "description": "Type: `file`, default: `merged.gene_counts.tsv`. ", + "help_text": "Type: `file`, default: `merged.gene_counts.tsv`. " + , + "default":"merged.gene_counts.tsv" + } + + + , + "counts_gene_length_scaled": { + "type": + "string", + "description": "Type: `file`, default: `merged.gene_counts_length_scaled.tsv`. ", + "help_text": "Type: `file`, default: `merged.gene_counts_length_scaled.tsv`. " + , + "default":"merged.gene_counts_length_scaled.tsv" + } + + + , + "counts_gene_scaled": { + "type": + "string", + "description": "Type: `file`, default: `merged.gene_counts_scaled.tsv`. ", + "help_text": "Type: `file`, default: `merged.gene_counts_scaled.tsv`. " + , + "default":"merged.gene_counts_scaled.tsv" + } + + + , + "lengths_gene": { + "type": + "string", + "description": "Type: `file`, default: `merged.gene_lengths.tsv`. ", + "help_text": "Type: `file`, default: `merged.gene_lengths.tsv`. " + , + "default":"merged.gene_lengths.tsv" + } + + + , + "tpm_transcript": { + "type": + "string", + "description": "Type: `file`, default: `merged.transcript_tpm.tsv`. ", + "help_text": "Type: `file`, default: `merged.transcript_tpm.tsv`. " + , + "default":"merged.transcript_tpm.tsv" + } + + + , + "counts_transcript": { + "type": + "string", + "description": "Type: `file`, default: `merged.transcript_counts.tsv`. ", + "help_text": "Type: `file`, default: `merged.transcript_counts.tsv`. " + , + "default":"merged.transcript_counts.tsv" + } + + + , + "lengths_transcript": { + "type": + "string", + "description": "Type: `file`, default: `merged.transcript_lengths.tsv`. ", + "help_text": "Type: `file`, default: `merged.transcript_lengths.tsv`. " + , + "default":"merged.transcript_lengths.tsv" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/tximport/tximport.r b/target/nextflow/tximport/tximport.r new file mode 100755 index 0000000..5036399 --- /dev/null +++ b/target/nextflow/tximport/tximport.r @@ -0,0 +1,142 @@ +#!/usr/bin/env Rscript + +# Script for importing and processing transcript-level quantifications. +# Written by Lorena Pantano, later modified by Jonathan Manning, and released under the MIT license. + +# Loading required libraries +library(SummarizedExperiment) +library(tximport) + +# Parsing command line arguments +args <- commandArgs(trailingOnly=TRUE) +if (length(args) < 4) { + stop("Usage: tximport.r ", + call.=FALSE) +} + +# Assigning command line arguments to variables +coldata_path <- args[1] +path <- args[2] +prefix <- args[3] +quant_type <- args[4] +tx2gene_path <- args[5] + +## Functions + +# Build a table from a SummarizedExperiment object +build_table <- function(se.obj, slot) { + cbind(rowData(se.obj)[,1:2], assays(se.obj)[[slot]]) +} + +# Write a table to a file with given parameters +write_se_table <- function(params) { + file_name <- paste0(prefix, ".", params$suffix) + write.table(build_table(params$obj, params$slot), file_name, + sep="\t", quote=FALSE, row.names = FALSE) +} + +# Read transcript metadata from a given path +read_transcript_info <- function(tinfo_path){ + info <- file.info(tinfo_path) + if (info$size == 0) { + stop("tx2gene file is empty") + } + + transcript_info <- read.csv(tinfo_path, sep="\t", header = FALSE, + col.names = c("tx", "gene_id", "gene_name")) + + extra <- setdiff(rownames(txi[[1]]), as.character(transcript_info[["tx"]])) + transcript_info <- rbind(transcript_info, data.frame(tx=extra, gene_id=extra, gene_name=extra)) + transcript_info <- transcript_info[match(rownames(txi[[1]]), transcript_info[["tx"]]), ] + rownames(transcript_info) <- transcript_info[["tx"]] + + list(transcript = transcript_info, + gene = unique(transcript_info[,2:3]), + tx2gene = transcript_info[,1:2]) +} + +# Read and process sample/column data from a given path +read_coldata <- function(coldata_path){ + if (file.exists(coldata_path)) { + coldata <- read.csv(coldata_path, sep="\t") + coldata <- coldata[match(names, coldata[,1]),] + coldata <- cbind(files = fns, coldata) + } else { + message("ColData not available: ", coldata_path) + coldata <- data.frame(files = fns, names = names) + } + rownames(coldata) <- coldata[["names"]] +} + +# Create a SummarizedExperiment object with given data +create_summarized_experiment <- function(counts, abundance, length, col_data, row_data) { + SummarizedExperiment(assays = list(counts = counts, abundance = abundance, length = length), + colData = col_data, + rowData = row_data) +} + +# Main script starts here + +# Define pattern for file names based on quantification type +pattern <- ifelse(quant_type == "kallisto", "abundance.tsv", ".*quant_results\\.sf") +fns <- list.files(path, pattern = pattern, recursive = T, full.names = T) +names <- basename(fns) +names(fns) <- names +dropInfReps <- quant_type == "kallisto" + +# Import transcript-level quantifications +txi <- tximport(fns, type = quant_type, txOut = TRUE, dropInfReps = dropInfReps) + +# Read transcript and sample data +transcript_info <- read_transcript_info(tx2gene_path) +coldata <- read_coldata(coldata_path) + +# Create initial SummarizedExperiment object +se <- create_summarized_experiment(txi[["counts"]], txi[["abundance"]], txi[["length"]], + DataFrame(coldata), transcript_info$transcript) + +# Setting parameters for writing tables +params <- list( + list(obj = se, slot = "abundance", suffix = "transcript_tpm.tsv"), + list(obj = se, slot = "counts", suffix = "transcript_counts.tsv"), + list(obj = se, slot = "length", suffix = "transcript_lengths.tsv") +) + +# Process gene-level data if tx2gene mapping is available +if ("tx2gene" %in% names(transcript_info) && !is.null(transcript_info$tx2gene)) { + tx2gene <- transcript_info$tx2gene + gi <- summarizeToGene(txi, tx2gene = tx2gene) + gi.ls <- summarizeToGene(txi, tx2gene = tx2gene, countsFromAbundance = "lengthScaledTPM") + gi.s <- summarizeToGene(txi, tx2gene = tx2gene, countsFromAbundance = "scaledTPM") + + gene_info <- transcript_info$gene[match(rownames(gi[[1]]), transcript_info$gene[["gene_id"]]),] + rownames(gene_info) <- gene_info[["tx"]] + + col_data_frame <- DataFrame(coldata) + + # Create gene-level SummarizedExperiment objects + gse <- create_summarized_experiment(gi[["counts"]], gi[["abundance"]], gi[["length"]], + col_data_frame, gene_info) + gse.ls <- create_summarized_experiment(gi.ls[["counts"]], gi.ls[["abundance"]], gi.ls[["length"]], + col_data_frame, gene_info) + gse.s <- create_summarized_experiment(gi.s[["counts"]], gi.s[["abundance"]], gi.s[["length"]], + col_data_frame, gene_info) + + params <- c(params, list( + list(obj = gse, slot = "length", suffix = "gene_lengths.tsv"), + list(obj = gse, slot = "abundance", suffix = "gene_tpm.tsv"), + list(obj = gse, slot = "counts", suffix = "gene_counts.tsv"), + list(obj = gse.ls, slot = "abundance", suffix = "gene_tpm_length_scaled.tsv"), + list(obj = gse.ls, slot = "counts", suffix = "gene_counts_length_scaled.tsv"), + list(obj = gse.s, slot = "abundance", suffix = "gene_tpm_scaled.tsv"), + list(obj = gse.s, slot = "counts", suffix = "gene_counts_scaled.tsv") + )) +} + +# Writing tables for each set of parameters +done <- lapply(params, write_se_table) + +# Output session information and citations +# Removed for now because the 'tximeta' package is not found sometimes +# citation("tximeta") +sessionInfo() \ No newline at end of file diff --git a/target/nextflow/ucsc/bedclip/.config.vsh.yaml b/target/nextflow/ucsc/bedclip/.config.vsh.yaml new file mode 100644 index 0000000..29e8fe9 --- /dev/null +++ b/target/nextflow/ucsc/bedclip/.config.vsh.yaml @@ -0,0 +1,204 @@ +name: "bedclip" +namespace: "ucsc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input_bedgraph" + description: "bedGraph file which should be converted" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sizes" + description: "File with chromosome sizes" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_bedgraph" + description: "bedGraph file after clipping" + info: null + default: + - "$id.$key.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Remove lines from bed file that refer to off-chromosome locations" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.bedgraph" +- type: "file" + path: "genome.sizes" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/ucsc/bedclip/main.nf" + - "modules/nf-core/ucsc/bedclip/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "rsync" + - "libcurl4" + interactive: false + - type: "docker" + run: + - "rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedClip\ + \ /usr/local/bin/\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/ucsc/bedclip/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/ucsc/bedclip" + executable: "target/nextflow/ucsc/bedclip/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/ucsc/bedclip/main.nf b/target/nextflow/ucsc/bedclip/main.nf new file mode 100644 index 0000000..ecd3a0f --- /dev/null +++ b/target/nextflow/ucsc/bedclip/main.nf @@ -0,0 +1,3841 @@ +// bedclip v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "bedclip", + "namespace" : "ucsc", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input_bedgraph", + "description" : "bedGraph file which should be converted", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sizes", + "description" : "File with chromosome sizes", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output_bedgraph", + "description" : "bedGraph file after clipping", + "default" : [ + "$id.$key.bedgraph" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Remove lines from bed file that refer to off-chromosome locations", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/test.bedgraph" + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/genome.sizes" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/ucsc/bedclip/main.nf", + "modules/nf-core/ucsc/bedclip/meta.yml" + ], + "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "rsync", + "libcurl4" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedClip /usr/local/bin/\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/ucsc/bedclip/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/ucsc/bedclip", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT_BEDGRAPH+x} ]; then echo "${VIASH_PAR_INPUT_BEDGRAPH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_bedgraph='&'#" ; else echo "# par_input_bedgraph="; fi ) +$( if [ ! -z ${VIASH_PAR_SIZES+x} ]; then echo "${VIASH_PAR_SIZES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sizes='&'#" ; else echo "# par_sizes="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_BEDGRAPH+x} ]; then echo "${VIASH_PAR_OUTPUT_BEDGRAPH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_bedgraph='&'#" ; else echo "# par_output_bedgraph="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +bedClip \\$par_input_bedgraph \\$par_sizes \\$par_output_bedgraph +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/ucsc/bedclip", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/ucsc/bedclip/nextflow.config b/target/nextflow/ucsc/bedclip/nextflow.config new file mode 100644 index 0000000..2e8bf11 --- /dev/null +++ b/target/nextflow/ucsc/bedclip/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'ucsc/bedclip' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Remove lines from bed file that refer to off-chromosome locations' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/ucsc/bedclip/nextflow_labels.config b/target/nextflow/ucsc/bedclip/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/ucsc/bedclip/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/ucsc/bedclip/nextflow_schema.json b/target/nextflow/ucsc/bedclip/nextflow_schema.json new file mode 100644 index 0000000..61efb2a --- /dev/null +++ b/target/nextflow/ucsc/bedclip/nextflow_schema.json @@ -0,0 +1,105 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "bedclip", +"description": "Remove lines from bed file that refer to off-chromosome locations", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input_bedgraph": { + "type": + "string", + "description": "Type: `file`. bedGraph file which should be converted", + "help_text": "Type: `file`. bedGraph file which should be converted" + + } + + + , + "sizes": { + "type": + "string", + "description": "Type: `file`. File with chromosome sizes", + "help_text": "Type: `file`. File with chromosome sizes" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_bedgraph": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.bedgraph`. bedGraph file after clipping", + "help_text": "Type: `file`, default: `$id.$key.bedgraph`. bedGraph file after clipping" + , + "default":"$id.$key.bedgraph" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/ucsc/bedgraphtobigwig/.config.vsh.yaml b/target/nextflow/ucsc/bedgraphtobigwig/.config.vsh.yaml new file mode 100644 index 0000000..85f648e --- /dev/null +++ b/target/nextflow/ucsc/bedgraphtobigwig/.config.vsh.yaml @@ -0,0 +1,204 @@ +name: "bedgraphtobigwig" +namespace: "ucsc" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--bedgraph" + description: "bedGraph file which should be converted" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sizes" + description: "File with chromosome sizes" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--bigwig" + description: "bigWig coverage file relative to genes on the input file" + info: null + default: + - "$id.$key.bigwig" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Convert a bedGraph file to bigWig format" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test.bedgraph" +- type: "file" + path: "genome.sizes" +info: + migration_info: + git_repo: "https://github.com/nf-core/rnaseq.git" + paths: + - "modules/nf-core/ucsc/bedgraphtobigwig/main.nf" + - "modules/nf-core/ucsc/bedgraphtobigwig/meta.yml" + last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "rsync" + - "libcurl4" + interactive: false + - type: "docker" + run: + - "rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedGraphToBigWig\ + \ /usr/local/bin/\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/ucsc/bedgraphtobigwig/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/ucsc/bedgraphtobigwig" + executable: "target/nextflow/ucsc/bedgraphtobigwig/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/ucsc/bedgraphtobigwig/main.nf b/target/nextflow/ucsc/bedgraphtobigwig/main.nf new file mode 100644 index 0000000..343da51 --- /dev/null +++ b/target/nextflow/ucsc/bedgraphtobigwig/main.nf @@ -0,0 +1,3841 @@ +// bedgraphtobigwig v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "bedgraphtobigwig", + "namespace" : "ucsc", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--bedgraph", + "description" : "bedGraph file which should be converted", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sizes", + "description" : "File with chromosome sizes", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--bigwig", + "description" : "bigWig coverage file relative to genes on the input file", + "default" : [ + "$id.$key.bigwig" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Convert a bedGraph file to bigWig format", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/test.bedgraph" + }, + { + "type" : "file", + "path" : "/testData/unit_test_resources/sarscov2/genome.sizes" + } + ], + "info" : { + "migration_info" : { + "git_repo" : "https://github.com/nf-core/rnaseq.git", + "paths" : [ + "modules/nf-core/ucsc/bedgraphtobigwig/main.nf", + "modules/nf-core/ucsc/bedgraphtobigwig/meta.yml" + ], + "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" + } + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "rsync", + "libcurl4" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedGraphToBigWig /usr/local/bin/\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/ucsc/bedgraphtobigwig/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/ucsc/bedgraphtobigwig", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_BEDGRAPH+x} ]; then echo "${VIASH_PAR_BEDGRAPH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bedgraph='&'#" ; else echo "# par_bedgraph="; fi ) +$( if [ ! -z ${VIASH_PAR_SIZES+x} ]; then echo "${VIASH_PAR_SIZES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sizes='&'#" ; else echo "# par_sizes="; fi ) +$( if [ ! -z ${VIASH_PAR_BIGWIG+x} ]; then echo "${VIASH_PAR_BIGWIG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bigwig='&'#" ; else echo "# par_bigwig="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +bedGraphToBigWig \\$par_bedgraph \\$par_sizes \\$par_bigwig +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/ucsc/bedgraphtobigwig", + "tag" : "v0.3.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/ucsc/bedgraphtobigwig/nextflow.config b/target/nextflow/ucsc/bedgraphtobigwig/nextflow.config new file mode 100644 index 0000000..f3ebda2 --- /dev/null +++ b/target/nextflow/ucsc/bedgraphtobigwig/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'ucsc/bedgraphtobigwig' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'Convert a bedGraph file to bigWig format' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/ucsc/bedgraphtobigwig/nextflow_labels.config b/target/nextflow/ucsc/bedgraphtobigwig/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/ucsc/bedgraphtobigwig/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/ucsc/bedgraphtobigwig/nextflow_schema.json b/target/nextflow/ucsc/bedgraphtobigwig/nextflow_schema.json new file mode 100644 index 0000000..1610798 --- /dev/null +++ b/target/nextflow/ucsc/bedgraphtobigwig/nextflow_schema.json @@ -0,0 +1,105 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "bedgraphtobigwig", +"description": "Convert a bedGraph file to bigWig format", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "bedgraph": { + "type": + "string", + "description": "Type: `file`. bedGraph file which should be converted", + "help_text": "Type: `file`. bedGraph file which should be converted" + + } + + + , + "sizes": { + "type": + "string", + "description": "Type: `file`. File with chromosome sizes", + "help_text": "Type: `file`. File with chromosome sizes" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "bigwig": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.bigwig`. bigWig coverage file relative to genes on the input file", + "help_text": "Type: `file`, default: `$id.$key.bigwig`. bigWig coverage file relative to genes on the input file" + , + "default":"$id.$key.bigwig" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/workflows/genome_alignment_and_quant/.config.vsh.yaml b/target/nextflow/workflows/genome_alignment_and_quant/.config.vsh.yaml new file mode 100644 index 0000000..ffc8fd7 --- /dev/null +++ b/target/nextflow/workflows/genome_alignment_and_quant/.config.vsh.yaml @@ -0,0 +1,641 @@ +name: "genome_alignment_and_quant" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "ID of the sample." + info: null + example: + - "foo" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_1" + alternatives: + - "-i" + description: "Path to the sample (or read 1 of paired end sample)." + info: null + example: + - "input.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Path to read 2 of the sample." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, or\ + \ reverse" + info: null + required: false + choices: + - "forward" + - "reverse" + - "unstranded" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "GTF file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcript_fasta" + description: "Fasta file of the reference transcriptome." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_index" + description: "STAR index directory." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--star_ignore_sjdbgtf" + description: "When using pre-built STAR indices do not re-extract and use splice\ + \ junctions from the GTF file" + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--star_sjdb_gtf_feature_exon" + description: "Feature type in GTF file to be used as exons for building transcripts" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--bam_csi_index" + description: "Create a CSI index for BAM files instead of the traditional BAI\ + \ index. This will be required for genomes with larger chromosome sizes." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--umi_dedup_stats" + description: "Generate output stats when running \"umi_tools dedup\"." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--with_umi" + description: "Enable UMI-based read deduplication." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--salmon_quant_libtype" + description: "Override Salmon library type inferred based on strandedness defined\ + \ in meta object." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_salmon_quant_args" + description: "Extra arguments to pass to salmon quant command in addition to defaults\ + \ defined by the pipeline." + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_group_features" + description: "Define the attribute type used to group features in the GTF file\ + \ when running Salmon." + info: null + default: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_extra_attributes" + description: "By default, the pipeline uses the gene_name field to obtain additional\ + \ gene identifiers from the input GTF file when running Salmon." + info: null + default: + - "gene_name" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_rsem_calculate_expression_args" + description: "Extra arguments to pass to rsem-calculate-expression command in\ + \ addition to defaults defined by the pipeline." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--aligner" + description: "Specifies the alignment algorithm to use - available options are\ + \ 'star_salmon', 'star_rsem' and 'hisat2'." + info: null + default: + - "star_salmon" + required: false + choices: + - "star_salmon" + - "star_rsem" + - "hisat2" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_index" + description: "Path to directory for pre-built RSEM index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_index" + description: "Path to directory for pre-built Salmon index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--star_multiqc" + info: null + default: + - "$id_star.log" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_sorted" + info: null + default: + - "$id.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_index" + info: null + default: + - "$id.genome.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_stats" + info: null + default: + - "$id.genome.stats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_flagstat" + info: null + default: + - "$id.genome.flagstat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_idxstats" + info: null + default: + - "$id.genome.idxstats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam" + info: null + default: + - "$id.transcriptome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_index" + info: null + default: + - "$id.transcriptome.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_stats" + info: null + default: + - "$id.transcriptome.stats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_flagstat" + info: null + default: + - "$id.transcriptome.flagstat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_idxstats" + info: null + default: + - "$id.transcriptome.idxstats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_out_dir" + info: null + default: + - "$id.salmon_quant" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_results_file" + info: null + default: + - "$id.quant.sf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_counts_gene" + description: "Expression counts on gene level" + info: null + default: + - "$id.genes.results" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcripts" + description: "Expression counts on transcript level" + info: null + default: + - "$id.isoforms.results" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_multiqc" + description: "RSEM statistics" + info: null + default: + - "$id.stat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_star_rsem" + description: "BAM file generated by STAR (optional)" + info: null + default: + - "$id.STAR.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_genome_rsem" + description: "Genome BAM file (optional)" + info: null + default: + - "$id.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_transcript_rsem" + description: "Transcript BAM file (optional)" + info: null + default: + - "$id.transcript.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A viash sub-workflow for genome alignment and quantification stage of\ + \ nf-core/rnaseq pipeline.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "star/star_align_reads" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_sort" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_index" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_stats" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_flagstat" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_idxstats" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "umi_tools/umi_tools_dedup" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "umi_tools/umi_tools_prepareforrsem" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "salmon/salmon_quant" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "rsem/rsem_calculate_expression" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/genome_alignment_and_quant/config.vsh.yaml" + runner: "nextflow" + engine: "native" + output: "target/nextflow/workflows/genome_alignment_and_quant" + executable: "target/nextflow/workflows/genome_alignment_and_quant/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/workflows/genome_alignment_and_quant/main.nf b/target/nextflow/workflows/genome_alignment_and_quant/main.nf new file mode 100644 index 0000000..c0afed9 --- /dev/null +++ b/target/nextflow/workflows/genome_alignment_and_quant/main.nf @@ -0,0 +1,4378 @@ +// genome_alignment_and_quant v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "genome_alignment_and_quant", + "namespace" : "workflows", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "ID of the sample.", + "example" : [ + "foo" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_1", + "alternatives" : [ + "-i" + ], + "description" : "Path to the sample (or read 1 of paired end sample).", + "example" : [ + "input.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_2", + "description" : "Path to read 2 of the sample.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--strandedness", + "description" : "Sample strand-specificity. Must be one of unstranded, forward, or reverse", + "required" : false, + "choices" : [ + "forward", + "reverse", + "unstranded" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf", + "description" : "GTF file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcript_fasta", + "description" : "Fasta file of the reference transcriptome.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--star_index", + "description" : "STAR index directory.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--star_ignore_sjdbgtf", + "description" : "When using pre-built STAR indices do not re-extract and use splice junctions from the GTF file", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--star_sjdb_gtf_feature_exon", + "description" : "Feature type in GTF file to be used as exons for building transcripts", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--bam_csi_index", + "description" : "Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--umi_dedup_stats", + "description" : "Generate output stats when running \\"umi_tools dedup\\".", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--with_umi", + "description" : "Enable UMI-based read deduplication.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--salmon_quant_libtype", + "description" : "Override Salmon library type inferred based on strandedness defined in meta object.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extra_salmon_quant_args", + "description" : "Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline.", + "default" : [ + "" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--gtf_group_features", + "description" : "Define the attribute type used to group features in the GTF file when running Salmon.", + "default" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--gtf_extra_attributes", + "description" : "By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon.", + "default" : [ + "gene_name" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extra_rsem_calculate_expression_args", + "description" : "Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--aligner", + "description" : "Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'.", + "default" : [ + "star_salmon" + ], + "required" : false, + "choices" : [ + "star_salmon", + "star_rsem", + "hisat2" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--rsem_index", + "description" : "Path to directory for pre-built RSEM index.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--salmon_index", + "description" : "Path to directory for pre-built Salmon index.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--star_multiqc", + "default" : [ + "$id_star.log" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_sorted", + "default" : [ + "$id.genome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_index", + "default" : [ + "$id.genome.bam.bai" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_stats", + "default" : [ + "$id.genome.stats" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_flagstat", + "default" : [ + "$id.genome.flagstat" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_idxstats", + "default" : [ + "$id.genome.idxstats" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcriptome_bam", + "default" : [ + "$id.transcriptome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcriptome_bam_index", + "default" : [ + "$id.transcriptome.bam.bai" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcriptome_bam_stats", + "default" : [ + "$id.transcriptome.stats" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcriptome_bam_flagstat", + "default" : [ + "$id.transcriptome.flagstat" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcriptome_bam_idxstats", + "default" : [ + "$id.transcriptome.idxstats" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--quant_out_dir", + "default" : [ + "$id.salmon_quant" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--quant_results_file", + "default" : [ + "$id.quant.sf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--salmon_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--rsem_counts_gene", + "description" : "Expression counts on gene level", + "default" : [ + "$id.genes.results" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_transcripts", + "description" : "Expression counts on transcript level", + "default" : [ + "$id.isoforms.results" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--rsem_multiqc", + "description" : "RSEM statistics", + "default" : [ + "$id.stat" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam_star_rsem", + "description" : "BAM file generated by STAR (optional)", + "default" : [ + "$id.STAR.genome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam_genome_rsem", + "description" : "Genome BAM file (optional)", + "default" : [ + "$id.genome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam_transcript_rsem", + "description" : "Transcript BAM file (optional)", + "default" : [ + "$id.transcript.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "A viash sub-workflow for genome alignment and quantification stage of nf-core/rnaseq pipeline.\n", + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "dependencies" : [ + { + "name" : "star/star_align_reads", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "samtools/samtools_sort", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "samtools/samtools_index", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "samtools/samtools_stats", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "samtools/samtools_flagstat", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "samtools/samtools_idxstats", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "umi_tools/umi_tools_dedup", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "umi_tools/umi_tools_prepareforrsem", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "salmon/salmon_quant", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "rsem/rsem_calculate_expression", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + } + ], + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/workflows/genome_alignment_and_quant/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native", + "output" : "target/nextflow/workflows/genome_alignment_and_quant", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) +meta["root_dir"] = getRootDir() +include { star_align_reads } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_align_reads/main.nf" +include { samtools_sort } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/main.nf" +include { samtools_index } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/main.nf" +include { samtools_stats } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/main.nf" +include { samtools_flagstat } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/main.nf" +include { samtools_idxstats } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/main.nf" +include { umi_tools_dedup } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_dedup/main.nf" +include { umi_tools_prepareforrsem } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf" +include { salmon_quant } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/main.nf" +include { rsem_calculate_expression } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_calculate_expression/main.nf" + +// inner workflow +// user-provided Nextflow code +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def paired = input.size() == 2 + [ id, state + [ paired: paired, input: input ] ] + } + + | star_align_reads.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "input": "fastq_1", + "input_r2": "fastq_2", + "genome_dir": "star_index", + "sjdb_gtf_file": "gtf", + "out_sam_attr_rg_line": "star_sam_attr_rg_line", + "sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon" + ], + toState: [ + "genome_bam": "aligned_reads", + "transcriptome_bam": "reads_aligned_to_transcriptome", + "star_multiqc": "log" + ], + args: [ + quant_mode: "TranscriptomeSAM", + twopass_mode: "Basic", + out_sam_type: "BAM;Unsorted", + run_rng_seed: 0, + out_filter_multimap_nmax: 20, + align_sjdb_overhang_min: 1, + out_sam_attributes: "NH;HI;AS;NM;MD", + quant_transcriptome_sam_output: "BanSingleEnd" + ], + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // GENOME BAM + | samtools_sort.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: ["input": "genome_bam"], + toState: ["genome_bam_sorted": "output"], + key: "genome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // + // Remove duplicate reads from BAM file based on UMIs + // + + // Deduplicate genome BAM file + | umi_tools_dedup.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: { id, state -> + def output_stats = state.umi_dedup_stats ? state.id : + [ paired: state.paired, + input: state.genome_bam, + bai: state.genome_bam_index, + output_stats: output_stats] + }, + toState: [ "genome_bam_sorted": "output" ], + key: "genome_deduped", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_deduped", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_deduped_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_deduped_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta", + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_deduped_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // Deduplicate transcriptome BAM file + + | samtools_sort.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam" ], + toState: [ "transcriptome_bam": "output" ], + key: "transcriptome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "csi": "bam_csi_index" + ], + toState: [ "transcriptome_bam_index": "output" ], + key: "transcriptome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "bai": "transcriptome_bam_index", + ], + toState: [ "transcriptome_bam_stats": "output" ], + key: "transcriptome_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_flagstat": "output" ], + key: "transcriptome_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_idxstats": "output" ], + key: "transcriptome_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + | umi_tools_dedup.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: { id, state -> + def output_stats = state.umi_dedup_stats ? state.id : + [ paired: state.paired, + input: state.transcriptome_bam, + bai: state.transcriptome_bam_index, + output_stats: output_stats] + }, + toState: [ "transcriptome_bam_deduped": "output" ], + key: "transcriptome_deduped", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_sort.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam_deduped" ], + toState: [ "transcriptome_bam": "output" ], + key: "transcriptome_deduped_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "csi": "bam_csi_index" + ], + toState: [ "transcriptome_bam_index": "output" ], + key: "transcriptome_deduped_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_stats": "output" ], + key: "transcriptome_deduped_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_flagstat": "output" ], + key: "transcriptome_deduped_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_idxstats": "output" ], + key: "transcriptome_deduped_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // Fix paired-end reads in name sorted BAM file + | umi_tools_prepareforrsem.run( + runIf: { id, state -> state.with_umi && state.paired && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam" ], + toState: [ "transcriptome_bam": "output" ], + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // Infer lib-type for salmon quant + | map { id, state -> + def lib_type = (state.paired) ? + ( + (state.strandedness == "forward") ? + "ISF" : + ( + (state.strandedness == "reverse") ? "ISR" : "IU" + ) + ) + : ( + (state.strandedness == "forward") ? + "SF" : + ( + (state.strandedness == "reverse") ? "SR" : "U" + ) + ) + [ id, state + [lib_type: lib_type] ] + } + + // Count reads from BAM alignments using Salmon + | salmon_quant.run( + runIf: { id, state -> state.aligner == 'star_salmon' }, + fromState: [ + "lib_type": "lib_type", + "alignments": "transcriptome_bam", + "targets": "transcript_fasta", + "gene_map": "gtf" + ], + toState: [ + "quant_out_dir": "output", + "quant_results_file": "quant_results" + ], + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + | map { id, state -> + def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state + [ id, mod_state ] + } + + | rsem_calculate_expression.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "id": "id", + "strandedness": "strandedness", + "paired": "paired", + "input": "input", + "index": "rsem_index", + "counts_gene": "rsem_counts_gene", + "counts_transcripts": "rsem_counts_transcripts", + "stat": "rsem_multiqc", + "logs": "star_multiqc", + "bam_star": "bam_star_rsem", + "bam_genome": "bam_genome_rsem", + "bam_transcript": "bam_transcript_rsem" + ], + args: [ + star: true, + star_output_genome_bam: true, + star_gzipped_read_file: true, + estimate_rspd: true, + seed: 1 + ], + toState: [ + "rsem_counts_gene": "counts_gene", + "rsem_counts_transcripts": "counts_transcripts", + "rsem_multiqc": "stat", + "star_multiqc": "logs", + "bam_star_rsem": "bam_star", + "bam_genome_rsem": "bam_genome", + "bam_transcript_rsem": "bam_transcript" + ], + directives: [ label: [ "highmem", "midcpu" ] ] + ) + + // RSEM_Star BAM + | samtools_sort.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: ["input": "bam_star_rsem"], + toState: ["genome_bam_sorted": "output"], + key: "genome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_index.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_sorted", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_stats.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_stats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_flagstat.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_flagstat", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_idxstats", + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + [ "star_multiqc": "star_multiqc", + "rsem_multiqc": "rsem_multiqc", + "salmon_multiqc": "salmon_multiqc", + "genome_bam_sorted": "genome_bam_sorted", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "transcriptome_bam": "transcriptome_bam", + "transcriptome_bam_index": "transcriptome_bam_index", + "transcriptome_bam_stats": "transcriptome_bam_stats", + "transcriptome_bam_flagstat": "transcriptome_bam_flagstat", + "transcriptome_bam_idxstats": "transcriptome_bam_idxstats", + "quant_out_dir": "quant_out_dir", + "quant_results_file": "quant_results_file", + "rsem_counts_gene": "rsem_counts_gene", + "rsem_counts_transcripts": "rsem_counts_transcripts", + "bam_genome_rsem": "bam_genome_rsem", + "bam_transcript_rsem": "bam_transcript_rsem" + ] + ) + + emit: + output_ch +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/workflows/genome_alignment_and_quant/nextflow.config b/target/nextflow/workflows/genome_alignment_and_quant/nextflow.config new file mode 100644 index 0000000..7cafa2b --- /dev/null +++ b/target/nextflow/workflows/genome_alignment_and_quant/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'workflows/genome_alignment_and_quant' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'A viash sub-workflow for genome alignment and quantification stage of nf-core/rnaseq pipeline.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/workflows/genome_alignment_and_quant/nextflow_labels.config b/target/nextflow/workflows/genome_alignment_and_quant/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/workflows/genome_alignment_and_quant/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/workflows/genome_alignment_and_quant/nextflow_schema.json b/target/nextflow/workflows/genome_alignment_and_quant/nextflow_schema.json new file mode 100644 index 0000000..7c5a26c --- /dev/null +++ b/target/nextflow/workflows/genome_alignment_and_quant/nextflow_schema.json @@ -0,0 +1,506 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "genome_alignment_and_quant", +"description": "A viash sub-workflow for genome alignment and quantification stage of nf-core/rnaseq pipeline.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "string", + "description": "Type: `string`, required, example: `foo`. ID of the sample", + "help_text": "Type: `string`, required, example: `foo`. ID of the sample." + + } + + + , + "fastq_1": { + "type": + "string", + "description": "Type: `file`, required, example: `input.fastq.gz`. Path to the sample (or read 1 of paired end sample)", + "help_text": "Type: `file`, required, example: `input.fastq.gz`. Path to the sample (or read 1 of paired end sample)." + + } + + + , + "fastq_2": { + "type": + "string", + "description": "Type: `file`. Path to read 2 of the sample", + "help_text": "Type: `file`. Path to read 2 of the sample." + + } + + + , + "strandedness": { + "type": + "string", + "description": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity", + "help_text": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity. Must be one of unstranded, forward, or reverse", + "enum": ["forward", "reverse", "unstranded"] + + + } + + + , + "gtf": { + "type": + "string", + "description": "Type: `file`. GTF file", + "help_text": "Type: `file`. GTF file" + + } + + + , + "transcript_fasta": { + "type": + "string", + "description": "Type: `file`. Fasta file of the reference transcriptome", + "help_text": "Type: `file`. Fasta file of the reference transcriptome." + + } + + + , + "star_index": { + "type": + "string", + "description": "Type: `file`. STAR index directory", + "help_text": "Type: `file`. STAR index directory." + + } + + + , + "star_ignore_sjdbgtf": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. When using pre-built STAR indices do not re-extract and use splice junctions from the GTF file", + "help_text": "Type: `boolean`, default: `false`. When using pre-built STAR indices do not re-extract and use splice junctions from the GTF file" + , + "default":false + } + + + , + "star_sjdb_gtf_feature_exon": { + "type": + "string", + "description": "Type: `string`. Feature type in GTF file to be used as exons for building transcripts", + "help_text": "Type: `string`. Feature type in GTF file to be used as exons for building transcripts" + + } + + + , + "bam_csi_index": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Create a CSI index for BAM files instead of the traditional BAI index", + "help_text": "Type: `boolean`, default: `false`. Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes." + , + "default":false + } + + + , + "umi_dedup_stats": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Generate output stats when running \"umi_tools dedup\"", + "help_text": "Type: `boolean`, default: `false`. Generate output stats when running \"umi_tools dedup\"." + , + "default":false + } + + + , + "with_umi": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Enable UMI-based read deduplication", + "help_text": "Type: `boolean`, default: `false`. Enable UMI-based read deduplication." + , + "default":false + } + + + , + "salmon_quant_libtype": { + "type": + "string", + "description": "Type: `string`. Override Salmon library type inferred based on strandedness defined in meta object", + "help_text": "Type: `string`. Override Salmon library type inferred based on strandedness defined in meta object." + + } + + + , + "extra_salmon_quant_args": { + "type": + "string", + "description": "Type: `string`, default: ``. Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline", + "help_text": "Type: `string`, default: ``. Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline." + , + "default":"" + } + + + , + "gtf_group_features": { + "type": + "string", + "description": "Type: `string`, default: `gene_id`. Define the attribute type used to group features in the GTF file when running Salmon", + "help_text": "Type: `string`, default: `gene_id`. Define the attribute type used to group features in the GTF file when running Salmon." + , + "default":"gene_id" + } + + + , + "gtf_extra_attributes": { + "type": + "string", + "description": "Type: `string`, default: `gene_name`. By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon", + "help_text": "Type: `string`, default: `gene_name`. By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon." + , + "default":"gene_name" + } + + + , + "extra_rsem_calculate_expression_args": { + "type": + "string", + "description": "Type: `string`. Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline", + "help_text": "Type: `string`. Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline." + + } + + + , + "aligner": { + "type": + "string", + "description": "Type: `string`, default: `star_salmon`, choices: ``star_salmon`, `star_rsem`, `hisat2``. Specifies the alignment algorithm to use - available options are \u0027star_salmon\u0027, \u0027star_rsem\u0027 and \u0027hisat2\u0027", + "help_text": "Type: `string`, default: `star_salmon`, choices: ``star_salmon`, `star_rsem`, `hisat2``. Specifies the alignment algorithm to use - available options are \u0027star_salmon\u0027, \u0027star_rsem\u0027 and \u0027hisat2\u0027.", + "enum": ["star_salmon", "star_rsem", "hisat2"] + + , + "default":"star_salmon" + } + + + , + "rsem_index": { + "type": + "string", + "description": "Type: `file`. Path to directory for pre-built RSEM index", + "help_text": "Type: `file`. Path to directory for pre-built RSEM index." + + } + + + , + "salmon_index": { + "type": + "string", + "description": "Type: `file`. Path to directory for pre-built Salmon index", + "help_text": "Type: `file`. Path to directory for pre-built Salmon index." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "star_multiqc": { + "type": + "string", + "description": "Type: `file`, default: `$id_star.log`. ", + "help_text": "Type: `file`, default: `$id_star.log`. " + , + "default":"$id_star.log" + } + + + , + "genome_bam_sorted": { + "type": + "string", + "description": "Type: `file`, default: `$id.genome.bam`. ", + "help_text": "Type: `file`, default: `$id.genome.bam`. " + , + "default":"$id.genome.bam" + } + + + , + "genome_bam_index": { + "type": + "string", + "description": "Type: `file`, default: `$id.genome.bam.bai`. ", + "help_text": "Type: `file`, default: `$id.genome.bam.bai`. " + , + "default":"$id.genome.bam.bai" + } + + + , + "genome_bam_stats": { + "type": + "string", + "description": "Type: `file`, default: `$id.genome.stats`. ", + "help_text": "Type: `file`, default: `$id.genome.stats`. " + , + "default":"$id.genome.stats" + } + + + , + "genome_bam_flagstat": { + "type": + "string", + "description": "Type: `file`, default: `$id.genome.flagstat`. ", + "help_text": "Type: `file`, default: `$id.genome.flagstat`. " + , + "default":"$id.genome.flagstat" + } + + + , + "genome_bam_idxstats": { + "type": + "string", + "description": "Type: `file`, default: `$id.genome.idxstats`. ", + "help_text": "Type: `file`, default: `$id.genome.idxstats`. " + , + "default":"$id.genome.idxstats" + } + + + , + "transcriptome_bam": { + "type": + "string", + "description": "Type: `file`, default: `$id.transcriptome.bam`. ", + "help_text": "Type: `file`, default: `$id.transcriptome.bam`. " + , + "default":"$id.transcriptome.bam" + } + + + , + "transcriptome_bam_index": { + "type": + "string", + "description": "Type: `file`, default: `$id.transcriptome.bam.bai`. ", + "help_text": "Type: `file`, default: `$id.transcriptome.bam.bai`. " + , + "default":"$id.transcriptome.bam.bai" + } + + + , + "transcriptome_bam_stats": { + "type": + "string", + "description": "Type: `file`, default: `$id.transcriptome.stats`. ", + "help_text": "Type: `file`, default: `$id.transcriptome.stats`. " + , + "default":"$id.transcriptome.stats" + } + + + , + "transcriptome_bam_flagstat": { + "type": + "string", + "description": "Type: `file`, default: `$id.transcriptome.flagstat`. ", + "help_text": "Type: `file`, default: `$id.transcriptome.flagstat`. " + , + "default":"$id.transcriptome.flagstat" + } + + + , + "transcriptome_bam_idxstats": { + "type": + "string", + "description": "Type: `file`, default: `$id.transcriptome.idxstats`. ", + "help_text": "Type: `file`, default: `$id.transcriptome.idxstats`. " + , + "default":"$id.transcriptome.idxstats" + } + + + , + "quant_out_dir": { + "type": + "string", + "description": "Type: `file`, default: `$id.salmon_quant`. ", + "help_text": "Type: `file`, default: `$id.salmon_quant`. " + , + "default":"$id.salmon_quant" + } + + + , + "quant_results_file": { + "type": + "string", + "description": "Type: `file`, default: `$id.quant.sf`. ", + "help_text": "Type: `file`, default: `$id.quant.sf`. " + , + "default":"$id.quant.sf" + } + + + , + "salmon_multiqc": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.salmon_multiqc`. ", + "help_text": "Type: `file`, default: `$id.$key.salmon_multiqc`. " + , + "default":"$id.$key.salmon_multiqc" + } + + + , + "rsem_counts_gene": { + "type": + "string", + "description": "Type: `file`, default: `$id.genes.results`. Expression counts on gene level", + "help_text": "Type: `file`, default: `$id.genes.results`. Expression counts on gene level" + , + "default":"$id.genes.results" + } + + + , + "counts_transcripts": { + "type": + "string", + "description": "Type: `file`, default: `$id.isoforms.results`. Expression counts on transcript level", + "help_text": "Type: `file`, default: `$id.isoforms.results`. Expression counts on transcript level" + , + "default":"$id.isoforms.results" + } + + + , + "rsem_multiqc": { + "type": + "string", + "description": "Type: `file`, default: `$id.stat`. RSEM statistics", + "help_text": "Type: `file`, default: `$id.stat`. RSEM statistics" + , + "default":"$id.stat" + } + + + , + "bam_star_rsem": { + "type": + "string", + "description": "Type: `file`, default: `$id.STAR.genome.bam`. BAM file generated by STAR (optional)", + "help_text": "Type: `file`, default: `$id.STAR.genome.bam`. BAM file generated by STAR (optional)" + , + "default":"$id.STAR.genome.bam" + } + + + , + "bam_genome_rsem": { + "type": + "string", + "description": "Type: `file`, default: `$id.genome.bam`. Genome BAM file (optional)", + "help_text": "Type: `file`, default: `$id.genome.bam`. Genome BAM file (optional)" + , + "default":"$id.genome.bam" + } + + + , + "bam_transcript_rsem": { + "type": + "string", + "description": "Type: `file`, default: `$id.transcript.bam`. Transcript BAM file (optional)", + "help_text": "Type: `file`, default: `$id.transcript.bam`. Transcript BAM file (optional)" + , + "default":"$id.transcript.bam" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/workflows/merge_quant_results/.config.vsh.yaml b/target/nextflow/workflows/merge_quant_results/.config.vsh.yaml new file mode 100644 index 0000000..4a1067c --- /dev/null +++ b/target/nextflow/workflows/merge_quant_results/.config.vsh.yaml @@ -0,0 +1,322 @@ +name: "merge_quant_results" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--salmon_quant_results" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--kallisto_quant_results" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "file" + name: "--gtf" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_extra_attributes" + info: null + default: + - "gene_name" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_group_features" + info: null + default: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--quant_type" + description: "Quantification method used." + info: null + default: + - "salmon" + required: false + choices: + - "salmon" + - "kallisto" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--versions" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--tpm_gene" + info: null + example: + - "gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene" + info: null + example: + - "gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_length_scaled" + info: null + example: + - "gene_counts_length_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_scaled" + info: null + example: + - "gene_counts_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_transcript" + info: null + example: + - "transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--lengths_gene" + info: null + example: + - "gene_length.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcript" + info: null + example: + - "transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--lengths_transcript" + info: null + example: + - "transcript_length.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_merged_summarizedexperiment" + info: null + example: + - "quant_merged_summarizedexperiment" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A sub-workflow to merge the counts obtained from salmon quant across\ + \ all samples." +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "tx2gene" + repository: + type: "local" +- name: "tximport" + repository: + type: "local" +- name: "summarizedexperiment" + repository: + type: "local" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/merge_quant_results/config.vsh.yaml" + runner: "nextflow" + engine: "native" + output: "target/nextflow/workflows/merge_quant_results" + executable: "target/nextflow/workflows/merge_quant_results/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/nextflow/tx2gene" + - "target/nextflow/tximport" + - "target/nextflow/summarizedexperiment" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/workflows/merge_quant_results/main.nf b/target/nextflow/workflows/merge_quant_results/main.nf new file mode 100644 index 0000000..7f83a33 --- /dev/null +++ b/target/nextflow/workflows/merge_quant_results/main.nf @@ -0,0 +1,3640 @@ +// merge_quant_results v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "merge_quant_results", + "namespace" : "workflows", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--salmon_quant_results", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--kallisto_quant_results", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "file", + "name" : "--gtf", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--gtf_extra_attributes", + "default" : [ + "gene_name" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--gtf_group_features", + "default" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--quant_type", + "description" : "Quantification method used.", + "default" : [ + "salmon" + ], + "required" : false, + "choices" : [ + "salmon", + "kallisto" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--versions", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--tpm_gene", + "example" : [ + "gene_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene", + "example" : [ + "gene_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene_length_scaled", + "example" : [ + "gene_counts_length_scaled.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene_scaled", + "example" : [ + "gene_counts_scaled.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tpm_transcript", + "example" : [ + "transcript_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--lengths_gene", + "example" : [ + "gene_length.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_transcript", + "example" : [ + "transcript_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--lengths_transcript", + "example" : [ + "transcript_length.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--quant_merged_summarizedexperiment", + "example" : [ + "quant_merged_summarizedexperiment" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "A sub-workflow to merge the counts obtained from salmon quant across all samples.", + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "dependencies" : [ + { + "name" : "tx2gene", + "repository" : { + "type" : "local" + } + }, + { + "name" : "tximport", + "repository" : { + "type" : "local" + } + }, + { + "name" : "summarizedexperiment", + "repository" : { + "type" : "local" + } + } + ], + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/workflows/merge_quant_results/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native", + "output" : "target/nextflow/workflows/merge_quant_results", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) +meta["root_dir"] = getRootDir() +include { tx2gene } from "${meta.resources_dir}/../../../nextflow/tx2gene/main.nf" +include { tximport } from "${meta.resources_dir}/../../../nextflow/tximport/main.nf" +include { summarizedexperiment } from "${meta.resources_dir}/../../../nextflow/summarizedexperiment/main.nf" + +// inner workflow +// user-provided Nextflow code +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + + | map { id, state -> + def quant_results = state.quant_type == 'kallisto' ? state.kallisto_quant_results : state.salmon_quant_results + [id, state + [quant_results: quant_results]] + } + + | tx2gene.run ( + fromState: [ + "quant_results": "quant_results", + "gtf_extra_attributes": "gtf_extra_attributes", + "gtf": "gtf", + "gtf_group_features": "gtf_group_features", + "quant_type": "quant_type" + ], + toState: [ "tx2gene_tsv": "tsv" ] + ) + + | tximport.run ( + fromState: [ + "quant_results": "quant_results", + "tx2gene_tsv": "tx2gene_tsv", + "quant_type": "quant_type" + ], + toState: [ + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "length_gene": "length_gene", + "length_transcript": "length_transcript" + ] + ) + + | summarizedexperiment.run ( + fromState: [ + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "tx2gene_tsv": "tx2gene_tsv" + ], + toState: [ "quant_merged_summarizedexperiment": "output" ] + ) + + | setState ( + [ "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment" ] + ) + + emit: + output_ch +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/workflows/merge_quant_results/nextflow.config b/target/nextflow/workflows/merge_quant_results/nextflow.config new file mode 100644 index 0000000..92ca1d0 --- /dev/null +++ b/target/nextflow/workflows/merge_quant_results/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'workflows/merge_quant_results' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'A sub-workflow to merge the counts obtained from salmon quant across all samples.' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/workflows/merge_quant_results/nextflow_labels.config b/target/nextflow/workflows/merge_quant_results/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/workflows/merge_quant_results/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/workflows/merge_quant_results/nextflow_schema.json b/target/nextflow/workflows/merge_quant_results/nextflow_schema.json new file mode 100644 index 0000000..437b50e --- /dev/null +++ b/target/nextflow/workflows/merge_quant_results/nextflow_schema.json @@ -0,0 +1,248 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "merge_quant_results", +"description": "A sub-workflow to merge the counts obtained from salmon quant across all samples.", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "salmon_quant_results": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "kallisto_quant_results": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `file`, multiple_sep: `\",\"`. " + + } + + + , + "gtf": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "gtf_extra_attributes": { + "type": + "string", + "description": "Type: `string`, default: `gene_name`. ", + "help_text": "Type: `string`, default: `gene_name`. " + , + "default":"gene_name" + } + + + , + "gtf_group_features": { + "type": + "string", + "description": "Type: `string`, default: `gene_id`. ", + "help_text": "Type: `string`, default: `gene_id`. " + , + "default":"gene_id" + } + + + , + "quant_type": { + "type": + "string", + "description": "Type: `string`, default: `salmon`, choices: ``salmon`, `kallisto``. Quantification method used", + "help_text": "Type: `string`, default: `salmon`, choices: ``salmon`, `kallisto``. Quantification method used.", + "enum": ["salmon", "kallisto"] + + , + "default":"salmon" + } + + + , + "versions": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "tpm_gene": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.tpm_gene.tsv`, example: `gene_tpm.tsv`. ", + "help_text": "Type: `file`, default: `$id.$key.tpm_gene.tsv`, example: `gene_tpm.tsv`. " + , + "default":"$id.$key.tpm_gene.tsv" + } + + + , + "counts_gene": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.counts_gene.tsv`, example: `gene_counts.tsv`. ", + "help_text": "Type: `file`, default: `$id.$key.counts_gene.tsv`, example: `gene_counts.tsv`. " + , + "default":"$id.$key.counts_gene.tsv" + } + + + , + "counts_gene_length_scaled": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.counts_gene_length_scaled.tsv`, example: `gene_counts_length_scaled.tsv`. ", + "help_text": "Type: `file`, default: `$id.$key.counts_gene_length_scaled.tsv`, example: `gene_counts_length_scaled.tsv`. " + , + "default":"$id.$key.counts_gene_length_scaled.tsv" + } + + + , + "counts_gene_scaled": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.counts_gene_scaled.tsv`, example: `gene_counts_scaled.tsv`. ", + "help_text": "Type: `file`, default: `$id.$key.counts_gene_scaled.tsv`, example: `gene_counts_scaled.tsv`. " + , + "default":"$id.$key.counts_gene_scaled.tsv" + } + + + , + "tpm_transcript": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.tpm_transcript.tsv`, example: `transcript_tpm.tsv`. ", + "help_text": "Type: `file`, default: `$id.$key.tpm_transcript.tsv`, example: `transcript_tpm.tsv`. " + , + "default":"$id.$key.tpm_transcript.tsv" + } + + + , + "lengths_gene": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.lengths_gene.tsv`, example: `gene_length.tsv`. ", + "help_text": "Type: `file`, default: `$id.$key.lengths_gene.tsv`, example: `gene_length.tsv`. " + , + "default":"$id.$key.lengths_gene.tsv" + } + + + , + "counts_transcript": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.counts_transcript.tsv`, example: `transcript_counts.tsv`. ", + "help_text": "Type: `file`, default: `$id.$key.counts_transcript.tsv`, example: `transcript_counts.tsv`. " + , + "default":"$id.$key.counts_transcript.tsv" + } + + + , + "lengths_transcript": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.lengths_transcript.tsv`, example: `transcript_length.tsv`. ", + "help_text": "Type: `file`, default: `$id.$key.lengths_transcript.tsv`, example: `transcript_length.tsv`. " + , + "default":"$id.$key.lengths_transcript.tsv" + } + + + , + "quant_merged_summarizedexperiment": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.quant_merged_summarizedexperiment`, example: `quant_merged_summarizedexperiment`. ", + "help_text": "Type: `file`, default: `$id.$key.quant_merged_summarizedexperiment`, example: `quant_merged_summarizedexperiment`. " + , + "default":"$id.$key.quant_merged_summarizedexperiment" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/workflows/post_processing/.config.vsh.yaml b/target/nextflow/workflows/post_processing/.config.vsh.yaml new file mode 100644 index 0000000..4a16ce0 --- /dev/null +++ b/target/nextflow/workflows/post_processing/.config.vsh.yaml @@ -0,0 +1,528 @@ +name: "post_processing" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "ID of the sample." + info: null + example: + - "foo" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, reverse\ + \ or auto" + info: null + default: + - "auto" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--paired" + description: "Paired fastq files or not?" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fasta" + description: "Path to FASTA genome file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fai" + description: "Path to FASTA index" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "GTF file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam" + description: "Genome BAM file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chrom_sizes" + description: "File containing chromosome lengths" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_multiqc" + description: "STAR align log file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_picard_args" + description: "Extra arguments to pass to picard MarkDuplicates command in addition\ + \ to defaults defined by the pipeline." + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_stringtie_args" + description: "Extra arguments to pass to stringtie command in addition to defaults\ + \ defined by the pipeline." + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--stringtie_ignore_gtf" + description: "Perform reference-guided de novo assembly of transcripts using StringTie,\ + \ i.e. don't restrict to those in GTF file." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--bam_csi_index" + description: "Create a CSI index for BAM files instead of the traditional BAI\ + \ index. This will be required for genomes with larger chromosome sizes." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_mapped_reads" + description: "Minimum percentage of uniquely mapped reads below which samples\ + \ are removed from further processing." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--with_umi" + description: "Enable UMI-based read deduplication." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_qc" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_markduplicates" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_stringtie" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_bigwig" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--processed_genome_bam" + info: null + default: + - "$id.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_index" + info: null + default: + - "$id.genome.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_stats" + info: null + default: + - "$id.genome.stats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_flagstat" + info: null + default: + - "$id.genome.flagstat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_idxstats" + info: null + default: + - "$id.genome.idxstats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--markduplicates_metrics" + info: null + default: + - "$id.MarkDuplicates.metrics.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_transcript_gtf" + info: null + default: + - "$id.stringtie.transcripts.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_coverage_gtf" + info: null + default: + - "$id.stringtie.coverage.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_abundance" + info: null + default: + - "$id.stringtie.gene_abundance.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_ballgown" + info: null + default: + - "$id.stringtie.ballgown" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bedgraph_forward" + info: null + default: + - "$id.forward.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bedgraph_reverse" + info: null + default: + - "$id.reverse.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bigwig_forward" + info: null + default: + - "$id.forward.bigwig" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bigwig_reverse" + info: null + default: + - "$id.reverse.bigwig" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A viash sub-workflow for the post-processing stage of nf-core/rnaseq\ + \ pipeline.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "picard_markduplicates" + repository: + type: "local" +- name: "samtools/samtools_sort" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_index" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_stats" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_flagstat" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "samtools/samtools_idxstats" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "stringtie" + repository: + type: "local" +- name: "bedtools/bedtools_genomecov" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "ucsc/bedclip" + repository: + type: "local" +- name: "ucsc/bedgraphtobigwig" + repository: + type: "local" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/post_processing/config.vsh.yaml" + runner: "nextflow" + engine: "native" + output: "target/nextflow/workflows/post_processing" + executable: "target/nextflow/workflows/post_processing/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/nextflow/picard_markduplicates" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats" + - "target/nextflow/stringtie" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov" + - "target/nextflow/ucsc/bedclip" + - "target/nextflow/ucsc/bedgraphtobigwig" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/workflows/post_processing/main.nf b/target/nextflow/workflows/post_processing/main.nf new file mode 100644 index 0000000..2211712 --- /dev/null +++ b/target/nextflow/workflows/post_processing/main.nf @@ -0,0 +1,4010 @@ +// post_processing v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "post_processing", + "namespace" : "workflows", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "ID of the sample.", + "example" : [ + "foo" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--strandedness", + "description" : "Sample strand-specificity. Must be one of unstranded, forward, reverse or auto", + "default" : [ + "auto" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--paired", + "description" : "Paired fastq files or not?", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fasta", + "description" : "Path to FASTA genome file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fai", + "description" : "Path to FASTA index", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf", + "description" : "GTF file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam", + "description" : "Genome BAM file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chrom_sizes", + "description" : "File containing chromosome lengths", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--star_multiqc", + "description" : "STAR align log file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extra_picard_args", + "description" : "Extra arguments to pass to picard MarkDuplicates command in addition to defaults defined by the pipeline.", + "default" : [ + "" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extra_stringtie_args", + "description" : "Extra arguments to pass to stringtie command in addition to defaults defined by the pipeline.", + "default" : [ + "" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--stringtie_ignore_gtf", + "description" : "Perform reference-guided de novo assembly of transcripts using StringTie, i.e. don't restrict to those in GTF file.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--bam_csi_index", + "description" : "Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_mapped_reads", + "description" : "Minimum percentage of uniquely mapped reads below which samples are removed from further processing.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--with_umi", + "description" : "Enable UMI-based read deduplication.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--skip_qc", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--skip_markduplicates", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--skip_stringtie", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--skip_bigwig", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--processed_genome_bam", + "default" : [ + "$id.genome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_index", + "default" : [ + "$id.genome.bam.bai" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_stats", + "default" : [ + "$id.genome.stats" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_flagstat", + "default" : [ + "$id.genome.flagstat" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_idxstats", + "default" : [ + "$id.genome.idxstats" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--markduplicates_metrics", + "default" : [ + "$id.MarkDuplicates.metrics.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--stringtie_transcript_gtf", + "default" : [ + "$id.stringtie.transcripts.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--stringtie_coverage_gtf", + "default" : [ + "$id.stringtie.coverage.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--stringtie_abundance", + "default" : [ + "$id.stringtie.gene_abundance.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--stringtie_ballgown", + "default" : [ + "$id.stringtie.ballgown" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bedgraph_forward", + "default" : [ + "$id.forward.bedgraph" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bedgraph_reverse", + "default" : [ + "$id.reverse.bedgraph" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bigwig_forward", + "default" : [ + "$id.forward.bigwig" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bigwig_reverse", + "default" : [ + "$id.reverse.bigwig" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "A viash sub-workflow for the post-processing stage of nf-core/rnaseq pipeline.\n", + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "dependencies" : [ + { + "name" : "picard_markduplicates", + "repository" : { + "type" : "local" + } + }, + { + "name" : "samtools/samtools_sort", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "samtools/samtools_index", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "samtools/samtools_stats", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "samtools/samtools_flagstat", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "samtools/samtools_idxstats", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "stringtie", + "repository" : { + "type" : "local" + } + }, + { + "name" : "bedtools/bedtools_genomecov", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "ucsc/bedclip", + "repository" : { + "type" : "local" + } + }, + { + "name" : "ucsc/bedgraphtobigwig", + "repository" : { + "type" : "local" + } + } + ], + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/workflows/post_processing/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native", + "output" : "target/nextflow/workflows/post_processing", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) +meta["root_dir"] = getRootDir() +include { picard_markduplicates } from "${meta.resources_dir}/../../../nextflow/picard_markduplicates/main.nf" +include { samtools_sort } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_sort/main.nf" +include { samtools_index } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_index/main.nf" +include { samtools_stats } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_stats/main.nf" +include { samtools_flagstat } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_flagstat/main.nf" +include { samtools_idxstats } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/samtools/samtools_idxstats/main.nf" +include { stringtie } from "${meta.resources_dir}/../../../nextflow/stringtie/main.nf" +include { bedtools_genomecov } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bedtools/bedtools_genomecov/main.nf" +include { bedclip } from "${meta.resources_dir}/../../../nextflow/ucsc/bedclip/main.nf" +include { bedgraphtobigwig } from "${meta.resources_dir}/../../../nextflow/ucsc/bedgraphtobigwig/main.nf" + +// inner workflow +// user-provided Nextflow code +// Note: some helper functionality is added at the end of this file + +workflow run_wf { + take: + input_ch + + main: + + output_ch = input_ch + + | picard_markduplicates.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "bam": "genome_bam", + "fasta": "fasta", + "fai": "fai", + "extra_picard_args": "extra_picard_args" + ], + toState: [ + "processed_genome_bam": "output_bam", + "markduplicates_metrics": "metrics" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_sort.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ "input": "processed_genome_bam" ], + toState: [ "processed_genome_bam": "output" ], + key: "genome_sorted_MarkDuplicates", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_index.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "input": "processed_genome_bam", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_sorted_MarkDuplicates", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_stats.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "input": "processed_genome_bam", + "bai": "genome_bam_index" + ], + toState: [ "genome_bam_stats": "output" ], + key: "MarkDuplicates_stats", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_flagstat.run ( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "bam": "processed_genome_bam", + "bai": "genome_bam_index" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "MarkDuplicates_flagstat", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | samtools_idxstats.run( + runIf: { id, state -> !state.skip_markduplicates && !state.with_umi }, + fromState: [ + "bam": "processed_genome_bam", + "bai": "genome_bam_index" + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "MarkDuplicates_idxstats", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | stringtie.run ( + runIf: { id, state -> !state.skip_stringtie }, + fromState: [ + "strandedness": "strandedness", + "bam": "processed_genome_bam", + "annotation_gtf": "gtf", + "extra_stringtie_args": "extra_stringtie_args" + ], + toState: [ + "stringtie_transcript_gtf": "transcript_gtf", + "stringtie_coverage_gtf": "coverage_gtf", + "stringtie_abundance": "abundance", + "stringtie_ballgown": "ballgown" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Genome-wide coverage with BEDTools + + | bedtools_genomecov.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bam": "processed_genome_bam", + ], + args: [ + split: true, + du: true, + bed_graph: true, + strand: "+" + ], + toState: [ "bedgraph_forward": "output" ], + key: "bedtools_genomecov_forward", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedtools_genomecov.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bam": "processed_genome_bam", + ], + args: [ + split: true, + du: true, + bed_graph: true, + strand: "-" + ], + toState: [ "bedgraph_reverse": "output" ], + key: "bedtools_genomecov_reverse", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedclip.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bedgraph": "bedgraph_forward", + "sizes": "chrom_sizes" + ], + toState: [ "bedgraph_forward": "output_bedgraph" ], + key: "bedclip_forward", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedgraphtobigwig.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "bedgraph": "bedgraph_forward", + "sizes": "chrom_sizes" + ], + toState: [ "bigwig_forward": "bigwig" ], + key: "bedgraphtobigwig_forward", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedclip.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bedgraph": "bedgraph_reverse", + "sizes": "chrom_sizes", + ], + toState: [ "bedgraph_reverse": "output_bedgraph" ], + key: "bedclip_reverse", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | bedgraphtobigwig.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "bedgraph": "bedgraph_reverse", + "sizes": "chrom_sizes" + ], + toState: [ "bigwig_reverse": "bigwig" ], + key: "bedgraphtobigwig_reverse", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + "processed_genome_bam": "processed_genome_bam", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "markduplicates_metrics": "markduplicates_metrics", + "stringtie_transcript_gtf": "stringtie_transcript_gtf", + "stringtie_coverage_gtf": "stringtie_coverage_gtf", + "stringtie_abundance": "stringtie_abundance", + "stringtie_ballgown": "stringtie_ballgown", + "bedgraph_forward": "bedgraph_forward", + "bedgraph_reverse": "bedgraph_reverse", + "bigwig_forward": "bigwig_forward", + "bigwig_reverse": "bigwig_reverse" + ) + + emit: + output_ch +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/workflows/post_processing/nextflow.config b/target/nextflow/workflows/post_processing/nextflow.config new file mode 100644 index 0000000..ef3f33d --- /dev/null +++ b/target/nextflow/workflows/post_processing/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'workflows/post_processing' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'A viash sub-workflow for the post-processing stage of nf-core/rnaseq pipeline.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/workflows/post_processing/nextflow_labels.config b/target/nextflow/workflows/post_processing/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/workflows/post_processing/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/workflows/post_processing/nextflow_schema.json b/target/nextflow/workflows/post_processing/nextflow_schema.json new file mode 100644 index 0000000..1a482b2 --- /dev/null +++ b/target/nextflow/workflows/post_processing/nextflow_schema.json @@ -0,0 +1,423 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "post_processing", +"description": "A viash sub-workflow for the post-processing stage of nf-core/rnaseq pipeline.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "string", + "description": "Type: `string`, required, example: `foo`. ID of the sample", + "help_text": "Type: `string`, required, example: `foo`. ID of the sample." + + } + + + , + "strandedness": { + "type": + "string", + "description": "Type: `string`, default: `auto`. Sample strand-specificity", + "help_text": "Type: `string`, default: `auto`. Sample strand-specificity. Must be one of unstranded, forward, reverse or auto" + , + "default":"auto" + } + + + , + "paired": { + "type": + "boolean", + "description": "Type: `boolean`. Paired fastq files or not?", + "help_text": "Type: `boolean`. Paired fastq files or not?" + + } + + + , + "fasta": { + "type": + "string", + "description": "Type: `file`, required. Path to FASTA genome file", + "help_text": "Type: `file`, required. Path to FASTA genome file." + + } + + + , + "fai": { + "type": + "string", + "description": "Type: `file`, required. Path to FASTA index", + "help_text": "Type: `file`, required. Path to FASTA index" + + } + + + , + "gtf": { + "type": + "string", + "description": "Type: `file`. GTF file", + "help_text": "Type: `file`. GTF file" + + } + + + , + "genome_bam": { + "type": + "string", + "description": "Type: `file`. Genome BAM file", + "help_text": "Type: `file`. Genome BAM file" + + } + + + , + "chrom_sizes": { + "type": + "string", + "description": "Type: `file`. File containing chromosome lengths", + "help_text": "Type: `file`. File containing chromosome lengths" + + } + + + , + "star_multiqc": { + "type": + "string", + "description": "Type: `file`. STAR align log file", + "help_text": "Type: `file`. STAR align log file." + + } + + + , + "extra_picard_args": { + "type": + "string", + "description": "Type: `string`, default: ``. Extra arguments to pass to picard MarkDuplicates command in addition to defaults defined by the pipeline", + "help_text": "Type: `string`, default: ``. Extra arguments to pass to picard MarkDuplicates command in addition to defaults defined by the pipeline." + , + "default":"" + } + + + , + "extra_stringtie_args": { + "type": + "string", + "description": "Type: `string`, default: ``. Extra arguments to pass to stringtie command in addition to defaults defined by the pipeline", + "help_text": "Type: `string`, default: ``. Extra arguments to pass to stringtie command in addition to defaults defined by the pipeline." + , + "default":"" + } + + + , + "stringtie_ignore_gtf": { + "type": + "boolean", + "description": "Type: `boolean`. Perform reference-guided de novo assembly of transcripts using StringTie, i", + "help_text": "Type: `boolean`. Perform reference-guided de novo assembly of transcripts using StringTie, i.e. don\u0027t restrict to those in GTF file." + + } + + + , + "bam_csi_index": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Create a CSI index for BAM files instead of the traditional BAI index", + "help_text": "Type: `boolean`, default: `false`. Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes." + , + "default":false + } + + + , + "min_mapped_reads": { + "type": + "integer", + "description": "Type: `integer`. Minimum percentage of uniquely mapped reads below which samples are removed from further processing", + "help_text": "Type: `integer`. Minimum percentage of uniquely mapped reads below which samples are removed from further processing." + + } + + + , + "with_umi": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Enable UMI-based read deduplication", + "help_text": "Type: `boolean`, default: `false`. Enable UMI-based read deduplication." + , + "default":false + } + + + , + "skip_qc": { + "type": + "boolean", + "description": "Type: `boolean`. ", + "help_text": "Type: `boolean`. " + + } + + + , + "skip_markduplicates": { + "type": + "boolean", + "description": "Type: `boolean`. ", + "help_text": "Type: `boolean`. " + + } + + + , + "skip_stringtie": { + "type": + "boolean", + "description": "Type: `boolean`. ", + "help_text": "Type: `boolean`. " + + } + + + , + "skip_bigwig": { + "type": + "boolean", + "description": "Type: `boolean`. ", + "help_text": "Type: `boolean`. " + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "processed_genome_bam": { + "type": + "string", + "description": "Type: `file`, default: `$id.genome.bam`. ", + "help_text": "Type: `file`, default: `$id.genome.bam`. " + , + "default":"$id.genome.bam" + } + + + , + "genome_bam_index": { + "type": + "string", + "description": "Type: `file`, default: `$id.genome.bam.bai`. ", + "help_text": "Type: `file`, default: `$id.genome.bam.bai`. " + , + "default":"$id.genome.bam.bai" + } + + + , + "genome_bam_stats": { + "type": + "string", + "description": "Type: `file`, default: `$id.genome.stats`. ", + "help_text": "Type: `file`, default: `$id.genome.stats`. " + , + "default":"$id.genome.stats" + } + + + , + "genome_bam_flagstat": { + "type": + "string", + "description": "Type: `file`, default: `$id.genome.flagstat`. ", + "help_text": "Type: `file`, default: `$id.genome.flagstat`. " + , + "default":"$id.genome.flagstat" + } + + + , + "genome_bam_idxstats": { + "type": + "string", + "description": "Type: `file`, default: `$id.genome.idxstats`. ", + "help_text": "Type: `file`, default: `$id.genome.idxstats`. " + , + "default":"$id.genome.idxstats" + } + + + , + "markduplicates_metrics": { + "type": + "string", + "description": "Type: `file`, default: `$id.MarkDuplicates.metrics.txt`. ", + "help_text": "Type: `file`, default: `$id.MarkDuplicates.metrics.txt`. " + , + "default":"$id.MarkDuplicates.metrics.txt" + } + + + , + "stringtie_transcript_gtf": { + "type": + "string", + "description": "Type: `file`, default: `$id.stringtie.transcripts.gtf`. ", + "help_text": "Type: `file`, default: `$id.stringtie.transcripts.gtf`. " + , + "default":"$id.stringtie.transcripts.gtf" + } + + + , + "stringtie_coverage_gtf": { + "type": + "string", + "description": "Type: `file`, default: `$id.stringtie.coverage.gtf`. ", + "help_text": "Type: `file`, default: `$id.stringtie.coverage.gtf`. " + , + "default":"$id.stringtie.coverage.gtf" + } + + + , + "stringtie_abundance": { + "type": + "string", + "description": "Type: `file`, default: `$id.stringtie.gene_abundance.txt`. ", + "help_text": "Type: `file`, default: `$id.stringtie.gene_abundance.txt`. " + , + "default":"$id.stringtie.gene_abundance.txt" + } + + + , + "stringtie_ballgown": { + "type": + "string", + "description": "Type: `file`, default: `$id.stringtie.ballgown`. ", + "help_text": "Type: `file`, default: `$id.stringtie.ballgown`. " + , + "default":"$id.stringtie.ballgown" + } + + + , + "bedgraph_forward": { + "type": + "string", + "description": "Type: `file`, default: `$id.forward.bedgraph`. ", + "help_text": "Type: `file`, default: `$id.forward.bedgraph`. " + , + "default":"$id.forward.bedgraph" + } + + + , + "bedgraph_reverse": { + "type": + "string", + "description": "Type: `file`, default: `$id.reverse.bedgraph`. ", + "help_text": "Type: `file`, default: `$id.reverse.bedgraph`. " + , + "default":"$id.reverse.bedgraph" + } + + + , + "bigwig_forward": { + "type": + "string", + "description": "Type: `file`, default: `$id.forward.bigwig`. ", + "help_text": "Type: `file`, default: `$id.forward.bigwig`. " + , + "default":"$id.forward.bigwig" + } + + + , + "bigwig_reverse": { + "type": + "string", + "description": "Type: `file`, default: `$id.reverse.bigwig`. ", + "help_text": "Type: `file`, default: `$id.reverse.bigwig`. " + , + "default":"$id.reverse.bigwig" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/workflows/pre_processing/.config.vsh.yaml b/target/nextflow/workflows/pre_processing/.config.vsh.yaml new file mode 100644 index 0000000..b6a1678 --- /dev/null +++ b/target/nextflow/workflows/pre_processing/.config.vsh.yaml @@ -0,0 +1,681 @@ +name: "pre_processing" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Inputs" + arguments: + - type: "string" + name: "--id" + description: "ID of the sample." + info: null + example: + - "foo" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_1" + description: "Path to the sample (or read 1 of paired end sample)." + info: null + example: + - "input.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Path to read 2 of the sample." + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, reverse\ + \ or auto" + info: null + default: + - "auto" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bbsplit_index" + description: "BBsplit index" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--ribo_database_manifest" + description: "Text file containing paths to fasta files (one per line) that will\ + \ be used to create the database for SortMeRNA." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcript_fasta" + description: "Path to FASTA transcriptome file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "Path to GTF annotation file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_index" + description: "Path to directory containing the Salmon index" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--num_trimmed_reads" + description: "Number of reads after trimming" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Extra pipeline options" + arguments: + - type: "boolean" + name: "--skip_qc" + description: "Skip QC steps of the workflow." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "FastQC options" + arguments: + - type: "boolean" + name: "--skip_fastqc" + description: "Skip FatQC step." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "UMI-tools options" + arguments: + - type: "boolean" + name: "--with_umi" + description: "Enable UMI-based read deduplication." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_umi_extract" + description: "Skip umi_tools extract step." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_extract_method" + description: "UMI pattern to use." + info: null + default: + - "string" + required: false + choices: + - "string" + - "regex" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_bc_pattern" + description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the\ + \ first 6 nucleotides of the read are from the UMI." + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_bc_pattern2" + description: "The UMI barcode pattern to use if the UMI is located in read 2." + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--umi_discard_read" + description: "After UMI barcode extraction discard either R1 or R2 by setting\ + \ this parameter to 1 or 2, respectively." + info: null + default: + - 0 + required: false + choices: + - 0 + - 1 + - 2 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_umi_separator" + description: "The character that separates the UMI in the read name. Most likely\ + \ a colon if you skipped the extraction with UMI-tools and used other software." + info: null + default: + - "_" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_grouping_method" + description: "Method to use to determine read groups by subsuming those with similar\ + \ UMIs. All methods start by identifying the reads with the same mapping position,\ + \ but treat similar yet nonidentical UMIs differently." + info: null + default: + - "directional" + required: false + choices: + - "unique" + - "percentile" + - "cluster" + - "adjacency" + - "directional" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--save_umi_intermeds" + description: "If this option is specified, intermediate FastQ and BAM files produced\ + \ by UMI-tools are also saved in the results directory." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Read trimming options" + arguments: + - type: "string" + name: "--trimmer" + description: "Specify the trimming tool to use." + info: null + default: + - "trimgalore" + required: false + choices: + - "trimgalore" + - "fastp" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_trimmed_reads" + description: "Minimum number of trimmed reads below which samples are removed\ + \ from further processing. Some downstream steps in the pipeline will fail if\ + \ this threshold is too low." + info: null + default: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_trimming" + description: "Skip the adapter trimming step." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--save_trimmed" + description: "Save the trimmed FastQ files in the results directory." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Read filtering options" + arguments: + - type: "boolean_true" + name: "--skip_bbsplit" + description: "Skip BBSplit for removal of non-reference genome reads." + info: null + direction: "input" + - type: "boolean_true" + name: "--remove_ribo_rna" + description: "Enable the removal of reads derived from ribosomal RNA using SortMeRNA." + info: null + direction: "input" +- name: "Output" + arguments: + - type: "file" + name: "--qc_output1" + description: "Path to output directory" + info: null + default: + - "${id}_r1.fastq.gz" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qc_output2" + description: "Path to output directory" + info: null + default: + - "${id}_r2.fastq.gz" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_html_1" + description: "FastQC HTML report for read 1." + info: null + default: + - "${id}_r1.fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_html_2" + description: "FastQC HTML report for read 2." + info: null + default: + - "${id}_r2.fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_zip_1" + description: "FastQC report archive for read 1." + info: null + default: + - "${id}_r1.fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_zip_2" + description: "FastQC report archive for read 2." + info: null + default: + - "${id}_r2.fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_log_1" + info: null + default: + - "${id}_r1.trimming_report.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_log_2" + info: null + default: + - "${id}_r2.trimming_report.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_html_1" + info: null + default: + - "${id}_r1.trimmed_fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_html_2" + info: null + default: + - "${id}_r2.trimmed_fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_zip_1" + info: null + default: + - "${id}_r1.trimmed_fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_zip_2" + info: null + default: + - "${id}_r2.trimmed_fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sortmerna_log" + description: "Sortmerna log file." + info: null + default: + - "$id.sortmerna.log" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_quant_output" + description: "Results from Salmon quant" + info: null + default: + - "$id.salmon_quant_output" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_json" + description: "The fastp json format report file name" + info: null + default: + - "$id.fastp_out.json" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_html" + description: "The fastp html format report file name" + info: null + default: + - "$id.fastp_out.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--merged_out" + description: "File name to store merged fastp output." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A subworkflow for the pre-processing stage of the nf-core/rnaseq pipeline.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "fastqc" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "umi_tools/umi_tools_extract" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "trimgalore" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "bbmap/bbmap_bbsplit" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "sortmerna" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "fastp" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "fq_subsample" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "salmon/salmon_quant" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/pre_processing/config.vsh.yaml" + runner: "nextflow" + engine: "native" + output: "target/nextflow/workflows/pre_processing" + executable: "target/nextflow/workflows/pre_processing/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/workflows/pre_processing/main.nf b/target/nextflow/workflows/pre_processing/main.nf new file mode 100644 index 0000000..9227d39 --- /dev/null +++ b/target/nextflow/workflows/pre_processing/main.nf @@ -0,0 +1,4291 @@ +// pre_processing v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "pre_processing", + "namespace" : "workflows", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "ID of the sample.", + "example" : [ + "foo" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_1", + "description" : "Path to the sample (or read 1 of paired end sample).", + "example" : [ + "input.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_2", + "description" : "Path to read 2 of the sample.", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--strandedness", + "description" : "Sample strand-specificity. Must be one of unstranded, forward, reverse or auto", + "default" : [ + "auto" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bbsplit_index", + "description" : "BBsplit index", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--ribo_database_manifest", + "description" : "Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcript_fasta", + "description" : "Path to FASTA transcriptome file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf", + "description" : "Path to GTF annotation file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--salmon_index", + "description" : "Path to directory containing the Salmon index", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--num_trimmed_reads", + "description" : "Number of reads after trimming", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Extra pipeline options", + "arguments" : [ + { + "type" : "boolean", + "name" : "--skip_qc", + "description" : "Skip QC steps of the workflow.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "FastQC options", + "arguments" : [ + { + "type" : "boolean", + "name" : "--skip_fastqc", + "description" : "Skip FatQC step.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "UMI-tools options", + "arguments" : [ + { + "type" : "boolean", + "name" : "--with_umi", + "description" : "Enable UMI-based read deduplication.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--skip_umi_extract", + "description" : "Skip umi_tools extract step.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umitools_extract_method", + "description" : "UMI pattern to use.", + "default" : [ + "string" + ], + "required" : false, + "choices" : [ + "string", + "regex" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umitools_bc_pattern", + "description" : "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI.", + "default" : [ + "" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umitools_bc_pattern2", + "description" : "The UMI barcode pattern to use if the UMI is located in read 2.", + "default" : [ + "" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--umi_discard_read", + "description" : "After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively.", + "default" : [ + 0 + ], + "required" : false, + "choices" : [ + 0, + 1, + 2 + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umitools_umi_separator", + "description" : "The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software.", + "default" : [ + "_" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umitools_grouping_method", + "description" : "Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.", + "default" : [ + "directional" + ], + "required" : false, + "choices" : [ + "unique", + "percentile", + "cluster", + "adjacency", + "directional" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--save_umi_intermeds", + "description" : "If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Read trimming options", + "arguments" : [ + { + "type" : "string", + "name" : "--trimmer", + "description" : "Specify the trimming tool to use.", + "default" : [ + "trimgalore" + ], + "required" : false, + "choices" : [ + "trimgalore", + "fastp" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_trimmed_reads", + "description" : "Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low.", + "default" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--skip_trimming", + "description" : "Skip the adapter trimming step.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--save_trimmed", + "description" : "Save the trimmed FastQ files in the results directory.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Read filtering options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--skip_bbsplit", + "description" : "Skip BBSplit for removal of non-reference genome reads.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--remove_ribo_rna", + "description" : "Enable the removal of reads derived from ribosomal RNA using SortMeRNA.", + "direction" : "input" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--qc_output1", + "description" : "Path to output directory", + "default" : [ + "${id}_r1.fastq.gz" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--qc_output2", + "description" : "Path to output directory", + "default" : [ + "${id}_r2.fastq.gz" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_html_1", + "description" : "FastQC HTML report for read 1.", + "default" : [ + "${id}_r1.fastqc.html" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_html_2", + "description" : "FastQC HTML report for read 2.", + "default" : [ + "${id}_r2.fastqc.html" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_zip_1", + "description" : "FastQC report archive for read 1.", + "default" : [ + "${id}_r1.fastqc.zip" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_zip_2", + "description" : "FastQC report archive for read 2.", + "default" : [ + "${id}_r2.fastqc.zip" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_log_1", + "default" : [ + "${id}_r1.trimming_report.txt" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_log_2", + "default" : [ + "${id}_r2.trimming_report.txt" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_html_1", + "default" : [ + "${id}_r1.trimmed_fastqc.html" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_html_2", + "default" : [ + "${id}_r2.trimmed_fastqc.html" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_zip_1", + "default" : [ + "${id}_r1.trimmed_fastqc.zip" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_zip_2", + "default" : [ + "${id}_r2.trimmed_fastqc.zip" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sortmerna_log", + "description" : "Sortmerna log file.", + "default" : [ + "$id.sortmerna.log" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--salmon_quant_output", + "description" : "Results from Salmon quant", + "default" : [ + "$id.salmon_quant_output" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_json", + "description" : "The fastp json format report file name", + "default" : [ + "$id.fastp_out.json" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_html", + "description" : "The fastp html format report file name", + "default" : [ + "$id.fastp_out.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--merged_out", + "description" : "File name to store merged fastp output.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "A subworkflow for the pre-processing stage of the nf-core/rnaseq pipeline.\n", + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "dependencies" : [ + { + "name" : "fastqc", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "umi_tools/umi_tools_extract", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "trimgalore", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "bbmap/bbmap_bbsplit", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "sortmerna", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "fastp", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "fq_subsample", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "salmon/salmon_quant", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + } + ], + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/workflows/pre_processing/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native", + "output" : "target/nextflow/workflows/pre_processing", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) +meta["root_dir"] = getRootDir() +include { fastqc } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/main.nf" +include { umi_tools_extract } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/umi_tools/umi_tools_extract/main.nf" +include { trimgalore } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/trimgalore/main.nf" +include { bbmap_bbsplit } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/main.nf" +include { sortmerna } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/sortmerna/main.nf" +include { fastp } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastp/main.nf" +include { fq_subsample } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fq_subsample/main.nf" +include { salmon_quant } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/main.nf" + +// inner workflow +// user-provided Nextflow code +workflow run_wf { + + take: + input_ch + + main: + output_ch = input_ch + + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def paired = input.size() == 2 + [ id, state + [paired: paired, input: input] ] + } + + | fastqc.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_fastqc }, + fromState: [ "input": "input" ], + toState: {id, output_state, state -> + def newKeys = [ + "fastqc_html_1":output_state["html"][0], + "fastqc_html_2": output_state["html"][1], + "fastqc_zip_1": output_state["zip"][0], + "fastqc_zip_2": output_state["zip"][1] + ] + def new_state = state + newKeys + return new_state + }, + args: [html: "*.html", zip: "*.zip"], + directives: [ label: [ "midmem", "highcpu" ] ] + ) + + // Extract UMIs from fastq files and discard read 1 or read 2 if required + | umi_tools_extract.run ( + runIf: { id, state -> state.with_umi && !state.skip_umi_extract }, + fromState: { id, state -> + def bc_pattern2 = state.paired ? state.umitools_bc_pattern2 : state.remove(state.umitools_bc_pattern2) + def output = "${id}.r1.fastq.gz" + def read2_out = state.paired ? "${id}.r2.fastq.gz" : state.remove(state.fastq_2) + [ input: state.fastq_1, + read2_in: state.fastq_2, + bc_pattern: state.umitools_bc_pattern, + bc_pattern2: bc_pattern2, + extract_method: state.umitools_extract_method, + umi_separator: state.umitools_umi_separator, + grouping_method: state.umitools_grouping_method, + output: output, + read2_out: read2_out ] + }, + toState: [ + "fastq_1": "output", + "fastq_2": "read2_out" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Discard read if required + | map { id, state -> + def paired = state.paired + def fastq_1 = state.fastq_1 + def fastq_2 = state.fastq_2 + if (paired && state.with_umi && !state.skip_umi_extract && state.umi_discard_read != 0) { + if (state.umi_discard_read == 1) { + fastq_1 = fastq_2 + } + fastq_2 = state.remove(state.fastq_2) + paired = false + } + [ id, state + [paired: paired, fastq_1: fastq_1, fastq_2: fastq_2] ] + } + + // Trim reads using Trim galore! + | trimgalore.run ( + runIf: { id, state -> !state.skip_trimming && state.trimmer == "trimgalore" }, + fromState: { id, state -> + def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + [ paired: state.paired, + input: input, + min_trimmed_reads: state.min_trimmed_reads, + trimmed_r1: state.qc_output1, + trimmed_r2: state.qc_output2 ] + }, + args: [gzip: true, fastqc: true], + toState: [ + "fastq_1": "trimmed_r1", + "fastq_2": "trimmed_r2", + "trim_log_1": "trimming_report_r1", + "trim_log_2": "trimming_report_r2", + "trim_zip_1": "trimmed_fastqc_zip_1", + "trim_zip_2": "trimmed_fastqc_zip_2", + "trim_html_1": "trimmed_fastqc_html_1", + "trim_html_2": "trimmed_fastqc_html_2" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Trim reads using fastp + | fastp.run( + runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" }, + fromState: { id, state -> + def outputState = state.paired ? [out1: state.qc_output1, out2: state.qc_output2] : [out1: state.qc_output1, out2: state.remove(state.qc_output2)] + [input_1: state.fastq_1, input_2: state.fastq_2] + outputState + [ in1: state.fastq_1, + in2: state.fastq_2, + merge: state.fastp_save_merged, + interleaved_in: state.interleaved_reads, + detect_adapter_for_pe: state.paired, + adapter_fasta: state.fastp_adapter_fasta ] + outputState + }, + toState: [ + "fastq_1": "out1", + "fastq_2": "out2", + "failed_trim": "failed_out", + "failed_trim_unpaired1": "unpaired1", + "failed_trim_unpaired2": "unpaired2", + "trim_json": "json", + "trim_html": "html", + "trim_merged_out": "merged_out" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Perform FASTQC on reads trimmed using fastp + | fastqc.run ( + runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" }, + fromState: { id, state -> + def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + [ input: input ] + }, + toState: {id, output_state, state -> + def newKeys = [ + "trim_html_1":output_state["html"][0], + "trim_html_2": output_state["html"][1], + "trim_zip_1": output_state["zip"][0], + "trim_zip_2": output_state["zip"][1] + ] + def new_state = state + newKeys + return new_state + }, + args: [html: "*.html", zip: "*.zip"], + key: "fastqc_trimming", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Filter out contaminant RNA + | bbmap_bbsplit.run ( + runIf: { id, state -> !state.skip_bbsplit }, + fromState: { id, state -> + def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + [ paired: state.paired, + input: input, + build: state.bbsplit_index ] + }, + args: ["only_build_index": false], + toState: [ + "fastq_1": "fastq_1", + "fastq_2": "fastq_2" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Sort reads by rRNA and non-rRNA + | sortmerna.run ( + runIf: { id, state -> state.remove_ribo_rna }, + fromState: { id, state -> + def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def filePaths = state.ribo_database_manifest.readLines() + def refs = filePaths.collect { it } + def other = "${id}_non_rRNA_reads/" + [ paired_in: state.paired, + input: input, + ref: refs, + out2: state.paired, + other: other ] + }, + args: [fastx: true, num_alignments: 1], + toState: { id, output_state, state -> + def newKeys = [ + "sortmerna_output": output_state["other"], + "sortmerna_log": output_state["log"] + ] + def new_state = state + newKeys + return new_state + }, + directives: [ label: [ "highmem", "midcpu" ] ] + ) + | map { id, state -> + if (state.remove_ribo_rna) { + def fastq_1 = state.sortmerna_output.listFiles().find{it.name == "other_fwd.fq.gz"} + def fastq_2 = state.sortmerna_output.listFiles().find{it.name == "other_rev.fq.gz"} + [ id, state + [fastq_1: fastq_1, fastq_2: fastq_2] ] + } else { + [ id, state ] + } + } + + // Sub-sample FastQ files and pseudo-align with Salmon to auto-infer strandedness + | fq_subsample.run ( + runIf: { id, state -> state.strandedness == 'auto' }, + fromState: { id, state -> + def outputState = state.paired ? [output_1: state.qc_output1, output_2: state.qc_output2] : [output_1: state.qc_output1, output_2: state.remove(state.qc_output2)] + [input_1: state.fastq_1, input_2: state.fastq_2] + outputState + }, + args: [ + record_count: 1000, + seed: 1 + ], + toState: [ + "subsampled_fastq_1": "output_1", + "subsampled_fastq_2": "output_2" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Infer lib-type for salmon quant + | map { id, state -> + def lib_type = (state.paired) ? + ( + (state.strandedness == "forward") ? + "ISF" : + ( + (state.strandedness == "reverse") ? "ISR" : "IU" + ) + ) + : ( + (state.strandedness == "forward") ? + "SF" : + ( + (state.strandedness == "reverse") ? "SR" : "U" + ) + ) + [ id, state + [lib_type: lib_type] ] + } + + | salmon_quant.run ( + runIf: { id, state -> state.strandedness == 'auto' }, + fromState: { id, state -> + def unmated_reads = !state.paired ? state.subsampled_fastq_1 : null + def mates1 = state.paired ? state.subsampled_fastq_1 : null + def mates2 = state.paired ? state.subsampled_fastq_2 : null + [ unmated_reads: unmated_reads, + mates1: mates1, + mates2: mates2, + gene_map: state.gtf, + index: state.salmon_index, + lib_type: state.lib_type ] + }, + args: [ "skip_quant": true ], + toState: [ "salmon_quant_output": "output" ], + directives: [ label: [ "highmem", "highcpu" ] ] + ) + + | map { id, state -> + def mod_state = (!state.paired) ? + [trim_log_2: state.remove(state.trim_log_2), trim_zip_2: state.remove(state.trim_zip_2), trim_html_2: state.remove(state.trim_html_2), failed_trim_unpaired2: state.remove(state.failed_trim_unpaired2)] : + [] + [ id, state + mod_state ] + } + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + "fastqc_html_1": "fastqc_html_1", + "fastqc_html_2": "fastqc_html_2", + "fastqc_zip_1": "fastqc_zip_1", + "fastqc_zip_2": "fastqc_zip_2", + "qc_output1": "fastq_1", + "qc_output2": "fastq_2", + "trim_log_1": "trim_log_1", + "trim_log_2": "trim_log_2", + "trim_zip_1": "trim_zip_1", + "trim_zip_2": "trim_zip_2", + "trim_html_1": "trim_html_1", + "trim_html_2": "trim_html_2", + "sortmerna_log": "sortmerna_log", + "trim_json": "trim_json", + "trim_html": "trim_html", + "trim_merged_out": "trim_merged_out", + "salmon_quant_output": "salmon_quant_output" + ) + + emit: + output_ch +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/workflows/pre_processing/nextflow.config b/target/nextflow/workflows/pre_processing/nextflow.config new file mode 100644 index 0000000..5058f20 --- /dev/null +++ b/target/nextflow/workflows/pre_processing/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'workflows/pre_processing' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'A subworkflow for the pre-processing stage of the nf-core/rnaseq pipeline.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/workflows/pre_processing/nextflow_labels.config b/target/nextflow/workflows/pre_processing/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/workflows/pre_processing/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/workflows/pre_processing/nextflow_schema.json b/target/nextflow/workflows/pre_processing/nextflow_schema.json new file mode 100644 index 0000000..e92a958 --- /dev/null +++ b/target/nextflow/workflows/pre_processing/nextflow_schema.json @@ -0,0 +1,626 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "pre_processing", +"description": "A subworkflow for the pre-processing stage of the nf-core/rnaseq pipeline.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "string", + "description": "Type: `string`, required, example: `foo`. ID of the sample", + "help_text": "Type: `string`, required, example: `foo`. ID of the sample." + + } + + + , + "fastq_1": { + "type": + "string", + "description": "Type: `file`, required, example: `input.fastq.gz`. Path to the sample (or read 1 of paired end sample)", + "help_text": "Type: `file`, required, example: `input.fastq.gz`. Path to the sample (or read 1 of paired end sample)." + + } + + + , + "fastq_2": { + "type": + "string", + "description": "Type: `file`. Path to read 2 of the sample", + "help_text": "Type: `file`. Path to read 2 of the sample." + + } + + + , + "strandedness": { + "type": + "string", + "description": "Type: `string`, default: `auto`. Sample strand-specificity", + "help_text": "Type: `string`, default: `auto`. Sample strand-specificity. Must be one of unstranded, forward, reverse or auto" + , + "default":"auto" + } + + + , + "bbsplit_index": { + "type": + "string", + "description": "Type: `file`. BBsplit index", + "help_text": "Type: `file`. BBsplit index" + + } + + + , + "ribo_database_manifest": { + "type": + "string", + "description": "Type: `file`. Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA", + "help_text": "Type: `file`. Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA." + + } + + + , + "transcript_fasta": { + "type": + "string", + "description": "Type: `file`. Path to FASTA transcriptome file", + "help_text": "Type: `file`. Path to FASTA transcriptome file." + + } + + + , + "gtf": { + "type": + "string", + "description": "Type: `file`. Path to GTF annotation file", + "help_text": "Type: `file`. Path to GTF annotation file." + + } + + + , + "salmon_index": { + "type": + "string", + "description": "Type: `file`. Path to directory containing the Salmon index", + "help_text": "Type: `file`. Path to directory containing the Salmon index" + + } + + + , + "num_trimmed_reads": { + "type": + "integer", + "description": "Type: `integer`. Number of reads after trimming", + "help_text": "Type: `integer`. Number of reads after trimming" + + } + + +} +}, + + + "extra pipeline options" : { + "title": "Extra pipeline options", + "type": "object", + "description": "No description", + "properties": { + + + "skip_qc": { + "type": + "boolean", + "description": "Type: `boolean`. Skip QC steps of the workflow", + "help_text": "Type: `boolean`. Skip QC steps of the workflow." + + } + + +} +}, + + + "fastqc options" : { + "title": "FastQC options", + "type": "object", + "description": "No description", + "properties": { + + + "skip_fastqc": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Skip FatQC step", + "help_text": "Type: `boolean`, default: `false`. Skip FatQC step." + , + "default":false + } + + +} +}, + + + "umi-tools options" : { + "title": "UMI-tools options", + "type": "object", + "description": "No description", + "properties": { + + + "with_umi": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Enable UMI-based read deduplication", + "help_text": "Type: `boolean`, default: `false`. Enable UMI-based read deduplication." + , + "default":false + } + + + , + "skip_umi_extract": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Skip umi_tools extract step", + "help_text": "Type: `boolean`, default: `false`. Skip umi_tools extract step." + , + "default":false + } + + + , + "umitools_extract_method": { + "type": + "string", + "description": "Type: `string`, default: `string`, choices: ``string`, `regex``. UMI pattern to use", + "help_text": "Type: `string`, default: `string`, choices: ``string`, `regex``. UMI pattern to use.", + "enum": ["string", "regex"] + + , + "default":"string" + } + + + , + "umitools_bc_pattern": { + "type": + "string", + "description": "Type: `string`, default: ``. The UMI barcode pattern to use e", + "help_text": "Type: `string`, default: ``. The UMI barcode pattern to use e.g. \u0027NNNNNN\u0027 indicates that the first 6 nucleotides of the read are from the UMI." + , + "default":"" + } + + + , + "umitools_bc_pattern2": { + "type": + "string", + "description": "Type: `string`, default: ``. The UMI barcode pattern to use if the UMI is located in read 2", + "help_text": "Type: `string`, default: ``. The UMI barcode pattern to use if the UMI is located in read 2." + , + "default":"" + } + + + , + "umi_discard_read": { + "type": + "integer", + "description": "Type: `integer`, default: `0`, choices: ``0`, `1`, `2``. After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively", + "help_text": "Type: `integer`, default: `0`, choices: ``0`, `1`, `2``. After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively.", + "enum": [0, 1, 2] + + , + "default":0 + } + + + , + "umitools_umi_separator": { + "type": + "string", + "description": "Type: `string`, default: `_`. The character that separates the UMI in the read name", + "help_text": "Type: `string`, default: `_`. The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software." + , + "default":"_" + } + + + , + "umitools_grouping_method": { + "type": + "string", + "description": "Type: `string`, default: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. Method to use to determine read groups by subsuming those with similar UMIs", + "help_text": "Type: `string`, default: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.", + "enum": ["unique", "percentile", "cluster", "adjacency", "directional"] + + , + "default":"directional" + } + + + , + "save_umi_intermeds": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory", + "help_text": "Type: `boolean`, default: `false`. If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory." + , + "default":false + } + + +} +}, + + + "read trimming options" : { + "title": "Read trimming options", + "type": "object", + "description": "No description", + "properties": { + + + "trimmer": { + "type": + "string", + "description": "Type: `string`, default: `trimgalore`, choices: ``trimgalore`, `fastp``. Specify the trimming tool to use", + "help_text": "Type: `string`, default: `trimgalore`, choices: ``trimgalore`, `fastp``. Specify the trimming tool to use.", + "enum": ["trimgalore", "fastp"] + + , + "default":"trimgalore" + } + + + , + "min_trimmed_reads": { + "type": + "integer", + "description": "Type: `integer`, default: `10000`. Minimum number of trimmed reads below which samples are removed from further processing", + "help_text": "Type: `integer`, default: `10000`. Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low." + , + "default":10000 + } + + + , + "skip_trimming": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Skip the adapter trimming step", + "help_text": "Type: `boolean`, default: `false`. Skip the adapter trimming step." + , + "default":false + } + + + , + "save_trimmed": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Save the trimmed FastQ files in the results directory", + "help_text": "Type: `boolean`, default: `false`. Save the trimmed FastQ files in the results directory." + , + "default":false + } + + +} +}, + + + "read filtering options" : { + "title": "Read filtering options", + "type": "object", + "description": "No description", + "properties": { + + + "skip_bbsplit": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip BBSplit for removal of non-reference genome reads", + "help_text": "Type: `boolean_true`, default: `false`. Skip BBSplit for removal of non-reference genome reads." + , + "default":false + } + + + , + "remove_ribo_rna": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Enable the removal of reads derived from ribosomal RNA using SortMeRNA", + "help_text": "Type: `boolean_true`, default: `false`. Enable the removal of reads derived from ribosomal RNA using SortMeRNA." + , + "default":false + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "qc_output1": { + "type": + "string", + "description": "Type: `file`, default: `${id}_r1.fastq.gz`. Path to output directory", + "help_text": "Type: `file`, default: `${id}_r1.fastq.gz`. Path to output directory" + , + "default":"${id}_r1.fastq.gz" + } + + + , + "qc_output2": { + "type": + "string", + "description": "Type: `file`, default: `${id}_r2.fastq.gz`. Path to output directory", + "help_text": "Type: `file`, default: `${id}_r2.fastq.gz`. Path to output directory" + , + "default":"${id}_r2.fastq.gz" + } + + + , + "fastqc_html_1": { + "type": + "string", + "description": "Type: `file`, default: `${id}_r1.fastqc.html`. FastQC HTML report for read 1", + "help_text": "Type: `file`, default: `${id}_r1.fastqc.html`. FastQC HTML report for read 1." + , + "default":"${id}_r1.fastqc.html" + } + + + , + "fastqc_html_2": { + "type": + "string", + "description": "Type: `file`, default: `${id}_r2.fastqc.html`. FastQC HTML report for read 2", + "help_text": "Type: `file`, default: `${id}_r2.fastqc.html`. FastQC HTML report for read 2." + , + "default":"${id}_r2.fastqc.html" + } + + + , + "fastqc_zip_1": { + "type": + "string", + "description": "Type: `file`, default: `${id}_r1.fastqc.zip`. FastQC report archive for read 1", + "help_text": "Type: `file`, default: `${id}_r1.fastqc.zip`. FastQC report archive for read 1." + , + "default":"${id}_r1.fastqc.zip" + } + + + , + "fastqc_zip_2": { + "type": + "string", + "description": "Type: `file`, default: `${id}_r2.fastqc.zip`. FastQC report archive for read 2", + "help_text": "Type: `file`, default: `${id}_r2.fastqc.zip`. FastQC report archive for read 2." + , + "default":"${id}_r2.fastqc.zip" + } + + + , + "trim_log_1": { + "type": + "string", + "description": "Type: `file`, default: `${id}_r1.trimming_report.txt`. ", + "help_text": "Type: `file`, default: `${id}_r1.trimming_report.txt`. " + , + "default":"${id}_r1.trimming_report.txt" + } + + + , + "trim_log_2": { + "type": + "string", + "description": "Type: `file`, default: `${id}_r2.trimming_report.txt`. ", + "help_text": "Type: `file`, default: `${id}_r2.trimming_report.txt`. " + , + "default":"${id}_r2.trimming_report.txt" + } + + + , + "trim_html_1": { + "type": + "string", + "description": "Type: `file`, default: `${id}_r1.trimmed_fastqc.html`. ", + "help_text": "Type: `file`, default: `${id}_r1.trimmed_fastqc.html`. " + , + "default":"${id}_r1.trimmed_fastqc.html" + } + + + , + "trim_html_2": { + "type": + "string", + "description": "Type: `file`, default: `${id}_r2.trimmed_fastqc.html`. ", + "help_text": "Type: `file`, default: `${id}_r2.trimmed_fastqc.html`. " + , + "default":"${id}_r2.trimmed_fastqc.html" + } + + + , + "trim_zip_1": { + "type": + "string", + "description": "Type: `file`, default: `${id}_r1.trimmed_fastqc.zip`. ", + "help_text": "Type: `file`, default: `${id}_r1.trimmed_fastqc.zip`. " + , + "default":"${id}_r1.trimmed_fastqc.zip" + } + + + , + "trim_zip_2": { + "type": + "string", + "description": "Type: `file`, default: `${id}_r2.trimmed_fastqc.zip`. ", + "help_text": "Type: `file`, default: `${id}_r2.trimmed_fastqc.zip`. " + , + "default":"${id}_r2.trimmed_fastqc.zip" + } + + + , + "sortmerna_log": { + "type": + "string", + "description": "Type: `file`, default: `$id.sortmerna.log`. Sortmerna log file", + "help_text": "Type: `file`, default: `$id.sortmerna.log`. Sortmerna log file." + , + "default":"$id.sortmerna.log" + } + + + , + "salmon_quant_output": { + "type": + "string", + "description": "Type: `file`, default: `$id.salmon_quant_output`. Results from Salmon quant", + "help_text": "Type: `file`, default: `$id.salmon_quant_output`. Results from Salmon quant" + , + "default":"$id.salmon_quant_output" + } + + + , + "trim_json": { + "type": + "string", + "description": "Type: `file`, default: `$id.fastp_out.json`. The fastp json format report file name", + "help_text": "Type: `file`, default: `$id.fastp_out.json`. The fastp json format report file name" + , + "default":"$id.fastp_out.json" + } + + + , + "trim_html": { + "type": + "string", + "description": "Type: `file`, default: `$id.fastp_out.html`. The fastp html format report file name", + "help_text": "Type: `file`, default: `$id.fastp_out.html`. The fastp html format report file name" + , + "default":"$id.fastp_out.html" + } + + + , + "merged_out": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.merged_out`. File name to store merged fastp output", + "help_text": "Type: `file`, default: `$id.$key.merged_out`. File name to store merged fastp output." + , + "default":"$id.$key.merged_out" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/extra pipeline options" + }, + + { + "$ref": "#/definitions/fastqc options" + }, + + { + "$ref": "#/definitions/umi-tools options" + }, + + { + "$ref": "#/definitions/read trimming options" + }, + + { + "$ref": "#/definitions/read filtering options" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/workflows/prepare_genome/.config.vsh.yaml b/target/nextflow/workflows/prepare_genome/.config.vsh.yaml new file mode 100644 index 0000000..9c81629 --- /dev/null +++ b/target/nextflow/workflows/prepare_genome/.config.vsh.yaml @@ -0,0 +1,556 @@ +name: "prepare_genome" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--fasta" + description: "Path to FASTA genome file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "Path to GTF annotation file. This parameter is *mandatory* if --genome\ + \ is not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gff" + description: "Path to GFF3 annotation file. Required if \"--gtf\" is not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--additional_fasta" + description: "FASTA file to concatenate to genome FASTA file e.g. containing spike-in\ + \ sequences." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcript_fasta" + description: "Path to FASTA transcriptome file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gene_bed" + description: "Path to BED file containing gene intervals. This will be created\ + \ from the GTF file if not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--splicesites" + description: "Splice sites file required for HISAT2." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_bbsplit" + description: "Skip BBSplit for removal of non-reference genome reads." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bbsplit_fasta_list" + description: "List of reference genomes (separated by \";\") to filter reads against\ + \ with BBSplit." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--star_index" + description: "Path to directory or tar.gz archive for pre-built STAR index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--star_sjdb_gtf_feature_exon" + description: "Feature type in GTF file to be used as exons for building transcripts" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_index" + description: "Path to directory or tar.gz archive for pre-built RSEM index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_index" + description: "Path to directory or tar.gz archive for pre-built Salmon index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--kallisto_index" + description: "Path to directory or tar.gz archive for pre-built Kallisto index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bbsplit_index" + description: "Path to directory or tar.gz archive for pre-built BBSplit index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--pseudo_aligner_kmer_size" + description: "Kmer length passed to indexing step of pseudoaligners." + info: null + default: + - 31 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--gencode" + description: "Specify if the GTF annotation is in GENCODE format." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--biotype" + description: "Biotype value to use while appending entries to GTF file when additional\ + \ fasta file is provided." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--filter_gtf" + description: "Whether to filter the GTF or not?" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--aligner" + description: "Specifies the alignment algorithm to use - available options are\ + \ 'star_salmon', 'star_rsem' and 'hisat2'." + info: null + default: + - "star_salmon" + required: false + choices: + - "star_salmon" + - "star_rsem" + - "hisat2" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--pseudo_aligner" + description: "Specifies the pseudo aligner to use - available options are 'salmon'.\ + \ Runs in addition to '--aligner'." + info: null + default: + - "salmon" + required: false + choices: + - "salmon" + - "kallisto" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--skip_alignment" + description: "Skip all of the alignment-based processes within the pipeline." + info: null + direction: "input" +- name: "Output" + arguments: + - type: "file" + name: "--fasta_uncompressed" + info: null + default: + - "reference_genome.fasta" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf_uncompressed" + info: null + default: + - "gene_annotation.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcript_fasta_uncompressed" + info: null + default: + - "transcriptome.fasta" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gene_bed_uncompressed" + info: null + default: + - "gene_annotation.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_index_uncompressed" + description: "Path to STAR index." + info: null + default: + - "STAR_index" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_index_uncompressed" + description: "Path to directory or tar.gz archive for pre-built RSEM index." + info: null + default: + - "RSEM_index" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_index_uncompressed" + description: "Path to Salmon index." + info: null + default: + - "Salmon_index" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--kallisto_index_uncompressed" + description: "Path to Kallisto index." + info: null + default: + - "Kallisto_index" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bbsplit_index_uncompressed" + description: "Path to BBSplit index." + info: null + default: + - "BBSplit_index" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chrom_sizes" + description: "File containing chromosome lengths" + info: null + default: + - "reference_genome.fasta.sizes" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fai" + description: "FASTA index file" + info: null + default: + - "reference_genome.fasta.fai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A subworkflow for preparing all the required genome references\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "gunzip" + repository: + type: "local" +- name: "gffread" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "cat_additional_fasta" + repository: + type: "local" +- name: "gtf2bed" + repository: + type: "local" +- name: "preprocess_transcripts_fasta" + repository: + type: "local" +- name: "gtf_filter" + repository: + type: "local" +- name: "rsem/rsem_prepare_reference" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "getchromsizes" + repository: + type: "local" +- name: "untar" + repository: + type: "vsh" + repo: "craftbox" + tag: "v0.1.0" +- name: "star/star_genome_generate" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "bbmap/bbmap_bbsplit" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "salmon/salmon_index" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "kallisto/kallisto_index" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/prepare_genome/config.vsh.yaml" + runner: "nextflow" + engine: "native" + output: "target/nextflow/workflows/prepare_genome" + executable: "target/nextflow/workflows/prepare_genome/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/nextflow/gunzip" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread" + - "target/nextflow/cat_additional_fasta" + - "target/nextflow/gtf2bed" + - "target/nextflow/preprocess_transcripts_fasta" + - "target/nextflow/gtf_filter" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference" + - "target/nextflow/getchromsizes" + - "target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/workflows/prepare_genome/main.nf b/target/nextflow/workflows/prepare_genome/main.nf new file mode 100644 index 0000000..5f6c195 --- /dev/null +++ b/target/nextflow/workflows/prepare_genome/main.nf @@ -0,0 +1,4162 @@ +// prepare_genome v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "prepare_genome", + "namespace" : "workflows", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--fasta", + "description" : "Path to FASTA genome file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf", + "description" : "Path to GTF annotation file. This parameter is *mandatory* if --genome is not specified.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gff", + "description" : "Path to GFF3 annotation file. Required if \\"--gtf\\" is not specified.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--additional_fasta", + "description" : "FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcript_fasta", + "description" : "Path to FASTA transcriptome file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gene_bed", + "description" : "Path to BED file containing gene intervals. This will be created from the GTF file if not specified.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--splicesites", + "description" : "Splice sites file required for HISAT2.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--skip_bbsplit", + "description" : "Skip BBSplit for removal of non-reference genome reads.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bbsplit_fasta_list", + "description" : "List of reference genomes (separated by \\";\\") to filter reads against with BBSplit.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--star_index", + "description" : "Path to directory or tar.gz archive for pre-built STAR index.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--star_sjdb_gtf_feature_exon", + "description" : "Feature type in GTF file to be used as exons for building transcripts", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--rsem_index", + "description" : "Path to directory or tar.gz archive for pre-built RSEM index.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--salmon_index", + "description" : "Path to directory or tar.gz archive for pre-built Salmon index.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--kallisto_index", + "description" : "Path to directory or tar.gz archive for pre-built Kallisto index.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bbsplit_index", + "description" : "Path to directory or tar.gz archive for pre-built BBSplit index.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--pseudo_aligner_kmer_size", + "description" : "Kmer length passed to indexing step of pseudoaligners.", + "default" : [ + 31 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--gencode", + "description" : "Specify if the GTF annotation is in GENCODE format.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--biotype", + "description" : "Biotype value to use while appending entries to GTF file when additional fasta file is provided.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--filter_gtf", + "description" : "Whether to filter the GTF or not?", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--aligner", + "description" : "Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'.", + "default" : [ + "star_salmon" + ], + "required" : false, + "choices" : [ + "star_salmon", + "star_rsem", + "hisat2" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--pseudo_aligner", + "description" : "Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'.", + "default" : [ + "salmon" + ], + "required" : false, + "choices" : [ + "salmon", + "kallisto" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--skip_alignment", + "description" : "Skip all of the alignment-based processes within the pipeline.", + "direction" : "input" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--fasta_uncompressed", + "default" : [ + "reference_genome.fasta" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf_uncompressed", + "default" : [ + "gene_annotation.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcript_fasta_uncompressed", + "default" : [ + "transcriptome.fasta" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gene_bed_uncompressed", + "default" : [ + "gene_annotation.bed" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--star_index_uncompressed", + "description" : "Path to STAR index.", + "default" : [ + "STAR_index" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--rsem_index_uncompressed", + "description" : "Path to directory or tar.gz archive for pre-built RSEM index.", + "default" : [ + "RSEM_index" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--salmon_index_uncompressed", + "description" : "Path to Salmon index.", + "default" : [ + "Salmon_index" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--kallisto_index_uncompressed", + "description" : "Path to Kallisto index.", + "default" : [ + "Kallisto_index" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bbsplit_index_uncompressed", + "description" : "Path to BBSplit index.", + "default" : [ + "BBSplit_index" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chrom_sizes", + "description" : "File containing chromosome lengths", + "default" : [ + "reference_genome.fasta.sizes" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fai", + "description" : "FASTA index file", + "default" : [ + "reference_genome.fasta.fai" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "A subworkflow for preparing all the required genome references\n", + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "dependencies" : [ + { + "name" : "gunzip", + "repository" : { + "type" : "local" + } + }, + { + "name" : "gffread", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "cat_additional_fasta", + "repository" : { + "type" : "local" + } + }, + { + "name" : "gtf2bed", + "repository" : { + "type" : "local" + } + }, + { + "name" : "preprocess_transcripts_fasta", + "repository" : { + "type" : "local" + } + }, + { + "name" : "gtf_filter", + "repository" : { + "type" : "local" + } + }, + { + "name" : "rsem/rsem_prepare_reference", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "getchromsizes", + "repository" : { + "type" : "local" + } + }, + { + "name" : "untar", + "repository" : { + "type" : "vsh", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + }, + { + "name" : "star/star_genome_generate", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "bbmap/bbmap_bbsplit", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "salmon/salmon_index", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "kallisto/kallisto_index", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + } + ], + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/workflows/prepare_genome/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native", + "output" : "target/nextflow/workflows/prepare_genome", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) +meta["root_dir"] = getRootDir() +include { gunzip } from "${meta.resources_dir}/../../../nextflow/gunzip/main.nf" +include { gffread } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/gffread/main.nf" +include { cat_additional_fasta } from "${meta.resources_dir}/../../../nextflow/cat_additional_fasta/main.nf" +include { gtf2bed } from "${meta.resources_dir}/../../../nextflow/gtf2bed/main.nf" +include { preprocess_transcripts_fasta } from "${meta.resources_dir}/../../../nextflow/preprocess_transcripts_fasta/main.nf" +include { gtf_filter } from "${meta.resources_dir}/../../../nextflow/gtf_filter/main.nf" +include { rsem_prepare_reference } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rsem/rsem_prepare_reference/main.nf" +include { getchromsizes } from "${meta.resources_dir}/../../../nextflow/getchromsizes/main.nf" +include { untar } from "${meta.root_dir}/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/main.nf" +include { star_genome_generate } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/star/star_genome_generate/main.nf" +include { bbmap_bbsplit } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bbmap/bbmap_bbsplit/main.nf" +include { salmon_index } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_index/main.nf" +include { kallisto_index } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_index/main.nf" + +// inner workflow +// user-provided Nextflow code +workflow run_wf { + + take: + input_ch + + main: + output_ch = input_ch + + // Uncompress fasta + | gunzip.run ( + fromState: [ "input": "fasta" ], + toState: [ "fasta": "output" ], + key: "gunzip_fasta", + args: [ output: "reference_genome.fasta" ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress gtf + | gunzip.run ( + runIf: {id, state -> state.gtf}, + fromState: [ "input": "gtf" ], + toState: [ "gtf": "output" ], + key: "gunzip_gtf", + args: [output: "gene_annotation.gtf"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress gff + | gunzip.run ( + runIf: {id, state -> !state.gtf && state.gff}, + fromState: [ "input": "gff" ], + toState: [ "gff": "output" ], + key: "gunzip_gff", + args: [output: "gene_annotation.gff"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // gff to gtf + | gffread.run ( + runIf: {id, state -> !state.gtf && state.gff}, + fromState: [ + "input": "gff", + "genome": "fasta" + ], + toState: [ "gtf": "outfile" ], + args: [ + outfile: "gene_annotation.gtf", + gtf_output: true, + keep_attrs: true, + keep_exon_attrs: true + ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | gtf_filter.run( + runIf: {id, state -> state.gtf && state.filter_gtf}, + fromState: [ + "fasta": "fasta", + "gtf": "gtf" + ], + toState: [ "gtf": "filtered_gtf" ], + args: [filtered_gtf: "gene_annotation.gtf"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress additional fasta + | gunzip.run ( + runIf: {id, state -> state.additional_fasta}, + fromState: [ "input": "additional_fasta" ], + toState: [ "additional_fasta": "output" ], + key: "gunzip_additional_fasta", + args: [output: "additional.fasta"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // concatenate additional fasta + | cat_additional_fasta.run ( + runIf: {id, state -> state.additional_fasta}, + fromState: [ + "fasta": "fasta", + "gtf": "gtf", + "additional_fasta": "additional_fasta", + "biotype": "biotype" + ], + toState: [ + "fasta": "fasta_output", + "gtf": "gtf_output" + ], + args: [ + fasta_output: "genome_additional.fasta", + gtf_output: "genome_additional.gtf" + ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress bed file + | gunzip.run ( + runIf: {id, state -> state.gene_bed}, + fromState: [ "input": "gene_bed" ], + toState: [ "gene_bed": "output" ], + key: "gunzip_gene_bed", + args: [output: "genome_additional.bed"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // gtf to bed + | gtf2bed.run ( + runIf: { id, state -> !state.gene_bed}, + fromState: [ "gtf": "gtf" ], + toState: [ "gene_bed": "bed_output" ], + args: [bed_output: "genome_additional.bed"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // uncompress transcript fasta + | gunzip.run ( + runIf: {id, state -> state.transcript_fasta}, + fromState: [ "input": "transcript_fasta" ], + toState: [ "transcript_fasta": "output" ], + key: "transcript_fasta", + args: [output: "transcriptome.fasta"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // preprocess transcripts fasta if gtf is in gencode format + | preprocess_transcripts_fasta.run ( + runIf: {id, state -> state.transcript_fasta && state.gencode}, + fromState: [ "transcript_fasta": "transcript_fasta" ], + toState: [ "transcript_fasta": "output" ], + args: [output: "transcriptome.fasta"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // make transcript FASTA if not provided + | rsem_prepare_reference.run ( + runIf: {id, state -> !state.transcript_fasta}, + fromState: [ + "reference_fasta_files": "fasta", + "gtf": "gtf" + ], + toState: [ "make_transcript_fasta_output": "output" ], + key: "make_transcript_fasta", + args: [reference_name: "genome"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + | map { id, state -> + def transcript_fasta = (!state.transcript_fasta) ? + state.make_transcript_fasta_output.listFiles().find{it.name == "genome.transcripts.fa"} : + state.transcript_fasta + [ id, state + [transcript_fasta: transcript_fasta] ] + } + + // chromosome size and fai index + | getchromsizes.run ( + fromState: [ "fasta": "fasta" ], + toState: [ + "fai": "fai", + "sizes": "sizes" + ], + key: "chromsizes", + args: [ + fai: "genome_additional.fasta.fai", + sizes: "genome_additional.fasta.sizes" + ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // untar bbsplit index, if available + | untar.run ( + runIf: {id, state -> state.bbsplit_index}, + fromState: [ "input": "bbsplit_index" ], + toState: [ "bbsplit_index": "output" ], + key: "untar_bbsplit_index", + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | map { id, state -> + // Check if bbsplit_fasta_list is defined + def ref = (state.bbsplit_fasta_list) ? + [state.fasta] + state.bbsplit_fasta_list : + [state.fasta] + [id, state + [bbsplit_ref: ref] ] + } + + // create bbsplit index, if not already available + | bbmap_bbsplit.run ( + runIf: {id, state -> !state.skip_bbsplit && !state.bbsplit_index}, + fromState: ["ref": "bbsplit_ref"], + toState: [ "bbsplit_index": "index" ], + args: [ + only_build_index: true, + index: "BBSplit_index" + ], + key: "generate_bbsplit_index" + ) + + // Uncompress STAR index or generate from scratch if required + | untar.run ( + runIf: {id, state -> state.star_index}, + fromState: [ "input": "star_index" ], + toState: [ "star_index": "output" ], + key: "untar_star_index", + args: [output: "STAR_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | star_genome_generate.run ( + runIf: {id, state -> !state.star_index && !state.skip_alignment}, + fromState: [ + "genome_fasta_files": "fasta", + "sjdb_gtf_file": "gtf", + "sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon" + ], + toState: [ "star_index": "index" ], + key: "generate_star_index", + args: [index: "STAR_index"], + directives: [ label: [ "highmem", "highcpu" ] ] + ) + + // Uncompress RSEM index or generate from scratch if required + | untar.run ( + runIf: {id, state -> state.rsem_index}, + fromState: [ "input": "rsem_index" ], + toState: [ "rsem_index": "output" ], + key: "untar_rsem_index", + args: [output: "RSEM_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | rsem_prepare_reference.run ( + runIf: {id, state -> !state.rsem_index && state.aligner == 'star_rsem'}, + fromState: [ + "reference_fasta_files": "fasta", + "gtf": "gtf" + ], + toState: [ "rsem_index": "output" ], + key: "generate_rsem_index", + args: [reference_name: "genome"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // TODO: Uncompress HISAT2 index or generate from scratch if required + + // Uncompress Salmon index or generate from scratch if required + | untar.run ( + runIf: {id, state -> state.salmon_index}, + fromState: [ "input": "salmon_index" ], + toState: [ "salmon_index": "output" ], + key: "untar_salmon_index", + args: [output: "Salmon_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | salmon_index.run ( + runIf: {id, state -> (state.aligner == 'star_salmon' || state.pseudo_aligner == "salmon") && !state.salmon_index}, + fromState: [ + "genome": "fasta", + "transcripts": "transcript_fasta", + "kmer_len": "pseudo_aligner_kmer_size", + "gencode": "gencode" + ], + toState: [ "salmon_index": "index" ], + key: "generate_salmon_index", + args: [index: "Salmon_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // Uncompress Kallisto index or generate from scratch if required + | untar.run ( + runIf: {id, state -> state.kallisto_index}, + fromState: [ "input": "kallisto_index" ], + toState: [ "kallisto_index": "output" ], + key: "untar_kallisto_index", + args: [output: "Kallisto_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | kallisto_index.run( + runIf: {id, state -> state.pseudo_aligner == "kallisto" && !state.kallisto_index}, + fromState: [ + "input": "transcript_fasta", + "kmer_size": "pseudo_aligner_kmer_size" + ], + toState: [ "kallisto_index": "index" ], + key: "generate_kallisto_index", + args: [index: "Kallisto_index"], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + "fasta_uncompressed": "fasta", + "gtf_uncompressed": "gtf", + "transcript_fasta_uncompressed": "transcript_fasta", + "gene_bed_uncompressed": "gene_bed", + "star_index_uncompressed": "star_index", + "salmon_index_uncompressed": "salmon_index", + "kallisto_index_uncompressed": "kallisto_index", + "bbsplit_index_uncompressed": "bbsplit_index", + "rsem_index_uncompressed": "rsem_index", + "chrom_sizes": "sizes", + "fai": "fai" + ) + + emit: + output_ch +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/workflows/prepare_genome/nextflow.config b/target/nextflow/workflows/prepare_genome/nextflow.config new file mode 100644 index 0000000..243fec5 --- /dev/null +++ b/target/nextflow/workflows/prepare_genome/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'workflows/prepare_genome' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'A subworkflow for preparing all the required genome references\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/workflows/prepare_genome/nextflow_labels.config b/target/nextflow/workflows/prepare_genome/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/workflows/prepare_genome/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/workflows/prepare_genome/nextflow_schema.json b/target/nextflow/workflows/prepare_genome/nextflow_schema.json new file mode 100644 index 0000000..aee6f59 --- /dev/null +++ b/target/nextflow/workflows/prepare_genome/nextflow_schema.json @@ -0,0 +1,423 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "prepare_genome", +"description": "A subworkflow for preparing all the required genome references\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "fasta": { + "type": + "string", + "description": "Type: `file`, required. Path to FASTA genome file", + "help_text": "Type: `file`, required. Path to FASTA genome file." + + } + + + , + "gtf": { + "type": + "string", + "description": "Type: `file`. Path to GTF annotation file", + "help_text": "Type: `file`. Path to GTF annotation file. This parameter is *mandatory* if --genome is not specified." + + } + + + , + "gff": { + "type": + "string", + "description": "Type: `file`. Path to GFF3 annotation file", + "help_text": "Type: `file`. Path to GFF3 annotation file. Required if \"--gtf\" is not specified." + + } + + + , + "additional_fasta": { + "type": + "string", + "description": "Type: `file`. FASTA file to concatenate to genome FASTA file e", + "help_text": "Type: `file`. FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences." + + } + + + , + "transcript_fasta": { + "type": + "string", + "description": "Type: `file`. Path to FASTA transcriptome file", + "help_text": "Type: `file`. Path to FASTA transcriptome file." + + } + + + , + "gene_bed": { + "type": + "string", + "description": "Type: `file`. Path to BED file containing gene intervals", + "help_text": "Type: `file`. Path to BED file containing gene intervals. This will be created from the GTF file if not specified." + + } + + + , + "splicesites": { + "type": + "string", + "description": "Type: `file`. Splice sites file required for HISAT2", + "help_text": "Type: `file`. Splice sites file required for HISAT2." + + } + + + , + "skip_bbsplit": { + "type": + "boolean", + "description": "Type: `boolean`. Skip BBSplit for removal of non-reference genome reads", + "help_text": "Type: `boolean`. Skip BBSplit for removal of non-reference genome reads." + + } + + + , + "bbsplit_fasta_list": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. List of reference genomes (separated by \";\") to filter reads against with BBSplit", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. List of reference genomes (separated by \";\") to filter reads against with BBSplit." + + } + + + , + "star_index": { + "type": + "string", + "description": "Type: `file`. Path to directory or tar", + "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built STAR index." + + } + + + , + "star_sjdb_gtf_feature_exon": { + "type": + "string", + "description": "Type: `string`. Feature type in GTF file to be used as exons for building transcripts", + "help_text": "Type: `string`. Feature type in GTF file to be used as exons for building transcripts" + + } + + + , + "rsem_index": { + "type": + "string", + "description": "Type: `file`. Path to directory or tar", + "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built RSEM index." + + } + + + , + "salmon_index": { + "type": + "string", + "description": "Type: `file`. Path to directory or tar", + "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built Salmon index." + + } + + + , + "kallisto_index": { + "type": + "string", + "description": "Type: `file`. Path to directory or tar", + "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built Kallisto index." + + } + + + , + "bbsplit_index": { + "type": + "string", + "description": "Type: `file`. Path to directory or tar", + "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built BBSplit index." + + } + + + , + "pseudo_aligner_kmer_size": { + "type": + "integer", + "description": "Type: `integer`, default: `31`. Kmer length passed to indexing step of pseudoaligners", + "help_text": "Type: `integer`, default: `31`. Kmer length passed to indexing step of pseudoaligners." + , + "default":31 + } + + + , + "gencode": { + "type": + "boolean", + "description": "Type: `boolean`. Specify if the GTF annotation is in GENCODE format", + "help_text": "Type: `boolean`. Specify if the GTF annotation is in GENCODE format." + + } + + + , + "biotype": { + "type": + "string", + "description": "Type: `string`. Biotype value to use while appending entries to GTF file when additional fasta file is provided", + "help_text": "Type: `string`. Biotype value to use while appending entries to GTF file when additional fasta file is provided." + + } + + + , + "filter_gtf": { + "type": + "boolean", + "description": "Type: `boolean`. Whether to filter the GTF or not?", + "help_text": "Type: `boolean`. Whether to filter the GTF or not?" + + } + + + , + "aligner": { + "type": + "string", + "description": "Type: `string`, default: `star_salmon`, choices: ``star_salmon`, `star_rsem`, `hisat2``. Specifies the alignment algorithm to use - available options are \u0027star_salmon\u0027, \u0027star_rsem\u0027 and \u0027hisat2\u0027", + "help_text": "Type: `string`, default: `star_salmon`, choices: ``star_salmon`, `star_rsem`, `hisat2``. Specifies the alignment algorithm to use - available options are \u0027star_salmon\u0027, \u0027star_rsem\u0027 and \u0027hisat2\u0027.", + "enum": ["star_salmon", "star_rsem", "hisat2"] + + , + "default":"star_salmon" + } + + + , + "pseudo_aligner": { + "type": + "string", + "description": "Type: `string`, default: `salmon`, choices: ``salmon`, `kallisto``. Specifies the pseudo aligner to use - available options are \u0027salmon\u0027", + "help_text": "Type: `string`, default: `salmon`, choices: ``salmon`, `kallisto``. Specifies the pseudo aligner to use - available options are \u0027salmon\u0027. Runs in addition to \u0027--aligner\u0027.", + "enum": ["salmon", "kallisto"] + + , + "default":"salmon" + } + + + , + "skip_alignment": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip all of the alignment-based processes within the pipeline", + "help_text": "Type: `boolean_true`, default: `false`. Skip all of the alignment-based processes within the pipeline." + , + "default":false + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "fasta_uncompressed": { + "type": + "string", + "description": "Type: `file`, default: `reference_genome.fasta`. ", + "help_text": "Type: `file`, default: `reference_genome.fasta`. " + , + "default":"reference_genome.fasta" + } + + + , + "gtf_uncompressed": { + "type": + "string", + "description": "Type: `file`, default: `gene_annotation.gtf`. ", + "help_text": "Type: `file`, default: `gene_annotation.gtf`. " + , + "default":"gene_annotation.gtf" + } + + + , + "transcript_fasta_uncompressed": { + "type": + "string", + "description": "Type: `file`, default: `transcriptome.fasta`. ", + "help_text": "Type: `file`, default: `transcriptome.fasta`. " + , + "default":"transcriptome.fasta" + } + + + , + "gene_bed_uncompressed": { + "type": + "string", + "description": "Type: `file`, default: `gene_annotation.bed`. ", + "help_text": "Type: `file`, default: `gene_annotation.bed`. " + , + "default":"gene_annotation.bed" + } + + + , + "star_index_uncompressed": { + "type": + "string", + "description": "Type: `file`, default: `STAR_index`. Path to STAR index", + "help_text": "Type: `file`, default: `STAR_index`. Path to STAR index." + , + "default":"STAR_index" + } + + + , + "rsem_index_uncompressed": { + "type": + "string", + "description": "Type: `file`, default: `RSEM_index`. Path to directory or tar", + "help_text": "Type: `file`, default: `RSEM_index`. Path to directory or tar.gz archive for pre-built RSEM index." + , + "default":"RSEM_index" + } + + + , + "salmon_index_uncompressed": { + "type": + "string", + "description": "Type: `file`, default: `Salmon_index`. Path to Salmon index", + "help_text": "Type: `file`, default: `Salmon_index`. Path to Salmon index." + , + "default":"Salmon_index" + } + + + , + "kallisto_index_uncompressed": { + "type": + "string", + "description": "Type: `file`, default: `Kallisto_index`. Path to Kallisto index", + "help_text": "Type: `file`, default: `Kallisto_index`. Path to Kallisto index." + , + "default":"Kallisto_index" + } + + + , + "bbsplit_index_uncompressed": { + "type": + "string", + "description": "Type: `file`, default: `BBSplit_index`. Path to BBSplit index", + "help_text": "Type: `file`, default: `BBSplit_index`. Path to BBSplit index." + , + "default":"BBSplit_index" + } + + + , + "chrom_sizes": { + "type": + "string", + "description": "Type: `file`, default: `reference_genome.fasta.sizes`. File containing chromosome lengths", + "help_text": "Type: `file`, default: `reference_genome.fasta.sizes`. File containing chromosome lengths" + , + "default":"reference_genome.fasta.sizes" + } + + + , + "fai": { + "type": + "string", + "description": "Type: `file`, default: `reference_genome.fasta.fai`. FASTA index file", + "help_text": "Type: `file`, default: `reference_genome.fasta.fai`. FASTA index file" + , + "default":"reference_genome.fasta.fai" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/workflows/pseudo_alignment_and_quant/.config.vsh.yaml b/target/nextflow/workflows/pseudo_alignment_and_quant/.config.vsh.yaml new file mode 100644 index 0000000..a99269b --- /dev/null +++ b/target/nextflow/workflows/pseudo_alignment_and_quant/.config.vsh.yaml @@ -0,0 +1,328 @@ +name: "pseudo_alignment_and_quant" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "ID of the sample." + info: null + example: + - "foo" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_1" + alternatives: + - "-i" + description: "Path to the sample (or read 1 of paired end sample)." + info: null + example: + - "input.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Path to read 2 of the sample." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, or\ + \ reverse" + info: null + required: false + choices: + - "forward" + - "reverse" + - "unstranded" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "GTF file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcript_fasta" + description: "Fasta file of the reference transcriptome." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--pseudo_aligner" + description: "Specifies the pseudo aligner to use - available options are 'salmon'.\ + \ Runs in addition to '--aligner'." + info: null + default: + - "false" + required: false + choices: + - "salmon" + - "kallisto" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_index" + description: "Salmon index" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--kallisto_index" + description: "Kallisto index" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--lib_type" + description: "Override library type inferred based on strandedness defined in\ + \ meta object" + info: null + default: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--kallisto_quant_fragment_length" + description: "For single-end mode only, the estimated average fragment length\ + \ to use for quantification with Kallisto." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--kallisto_quant_fragment_length_sd" + description: "For single-end mode only, the estimated standard deviation of the\ + \ fragment length for quantification with Kallisto." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--pseudo_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_out_dir" + info: null + default: + - "$id.quant" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_quant_results_file" + info: null + default: + - "$id.quant.sf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--kallisto_quant_results_file" + info: null + default: + - "$id.abundance.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A viash sub-workflow for pseudo alignment and quantification stage of\ + \ nf-core/rnaseq pipeline.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "salmon/salmon_quant" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "kallisto/kallisto_quant" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/pseudo_alignment_and_quant/config.vsh.yaml" + runner: "nextflow" + engine: "native" + output: "target/nextflow/workflows/pseudo_alignment_and_quant" + executable: "target/nextflow/workflows/pseudo_alignment_and_quant/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/workflows/pseudo_alignment_and_quant/main.nf b/target/nextflow/workflows/pseudo_alignment_and_quant/main.nf new file mode 100644 index 0000000..49b3b70 --- /dev/null +++ b/target/nextflow/workflows/pseudo_alignment_and_quant/main.nf @@ -0,0 +1,3672 @@ +// pseudo_alignment_and_quant v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "pseudo_alignment_and_quant", + "namespace" : "workflows", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "ID of the sample.", + "example" : [ + "foo" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_1", + "alternatives" : [ + "-i" + ], + "description" : "Path to the sample (or read 1 of paired end sample).", + "example" : [ + "input.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_2", + "description" : "Path to read 2 of the sample.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--strandedness", + "description" : "Sample strand-specificity. Must be one of unstranded, forward, or reverse", + "required" : false, + "choices" : [ + "forward", + "reverse", + "unstranded" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf", + "description" : "GTF file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcript_fasta", + "description" : "Fasta file of the reference transcriptome.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--pseudo_aligner", + "description" : "Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'.", + "default" : [ + "false" + ], + "required" : false, + "choices" : [ + "salmon", + "kallisto" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--salmon_index", + "description" : "Salmon index", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--kallisto_index", + "description" : "Kallisto index", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--lib_type", + "description" : "Override library type inferred based on strandedness defined in meta object", + "default" : [ + "" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--kallisto_quant_fragment_length", + "description" : "For single-end mode only, the estimated average fragment length to use for quantification with Kallisto.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--kallisto_quant_fragment_length_sd", + "description" : "For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--pseudo_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--quant_out_dir", + "default" : [ + "$id.quant" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--salmon_quant_results_file", + "default" : [ + "$id.quant.sf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--kallisto_quant_results_file", + "default" : [ + "$id.abundance.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "A viash sub-workflow for pseudo alignment and quantification stage of nf-core/rnaseq pipeline.\n", + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "dependencies" : [ + { + "name" : "salmon/salmon_quant", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "kallisto/kallisto_quant", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + } + ], + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/workflows/pseudo_alignment_and_quant/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native", + "output" : "target/nextflow/workflows/pseudo_alignment_and_quant", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) +meta["root_dir"] = getRootDir() +include { salmon_quant } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/salmon/salmon_quant/main.nf" +include { kallisto_quant } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/kallisto/kallisto_quant/main.nf" + +// inner workflow +// user-provided Nextflow code +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def paired = input.size() == 2 + [ id, state + [ paired: paired, input: input ] ] + } + + + // Infer lib-type for salmon quant + | map { id, state -> + def lib_type = (state.paired) ? + ( + (state.strandedness == "forward") ? + "ISF" : + ( + (state.strandedness == "reverse") ? "ISR" : "IU" + ) + ) + : ( + (state.strandedness == "forward") ? + "SF" : + ( + (state.strandedness == "reverse") ? "SR" : "U" + ) + ) + [ id, state + [lib_type: lib_type] ] + } + + // Count reads from BAM alignments using Salmon + | salmon_quant.run ( + runIf: { id, state -> state.pseudo_aligner == 'salmon' }, + fromState: { id, state -> + def unmated_reads = !state.paired ? state.fastq_1 : null + def mates1 = state.paired ? state.fastq_1 : null + def mates2 = state.paired ? state.fastq_2 : null + [ unmated_reads: unmated_reads, + mates1: mates1, + mates2: mates2, + gene_map: state.gtf, + index: state.salmon_index, + lib_type: state.lib_type ] + }, + toState: [ + "quant_out_dir": "output", + "salmon_quant_results_file": "quant_results" + ], + directives: [ label: [ "midmem", "highcpu" ] ] + ) + + | map { id, state -> + def mod_state = (state.pseudo_aligner == 'salmon') ? state + [pseudo_multiqc: state.quant_out_dir] : state + [ id, mod_state ] + } + + | kallisto_quant.run ( + runIf: { id, state -> state.pseudo_aligner == 'kallisto'}, + fromState: { id, state -> + def fr_stranded = state.strandedness == 'forward' + def rf_stranded = state.strandedness == 'reverse' + [ + input: state.input, + index: state.kallisto_index, + fragment_length: state.kallisto_quant_fragment_length, + sd: state.kallisto_quant_fragment_length_sd, + single: !state.paired, + fr_stranded: fr_stranded, + rf_stranded: rf_stranded, + ] + }, + args: [log: "kallisto_quant.log"], + toState: { id, output_state, state -> + def neKeys = [ + "quant_out_dir": output_state["output_dir"], + "kallisto_quant_results_file": output_state["output_dir"] + "/abundance.tsv", + "pseudo_multiqc": output_state["log"] + ] + def new_state = state + newKeys + return new_state + }, + directives: [ label: [ "midmem", "highcpu" ] ] + ) + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + [ "pseudo_multiqc": "quant_results", + "quant_out_dir": "quant_out_dir", + "salmon_quant_results_file": "salmon_quant_results_file", + "kallisto_quant_results_file": "kallisto_quant_results_file" + ] + ) + + emit: + output_ch +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow.config b/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow.config new file mode 100644 index 0000000..2b80b67 --- /dev/null +++ b/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'workflows/pseudo_alignment_and_quant' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'A viash sub-workflow for pseudo alignment and quantification stage of nf-core/rnaseq pipeline.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_labels.config b/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_schema.json b/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_schema.json new file mode 100644 index 0000000..904aed2 --- /dev/null +++ b/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_schema.json @@ -0,0 +1,244 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "pseudo_alignment_and_quant", +"description": "A viash sub-workflow for pseudo alignment and quantification stage of nf-core/rnaseq pipeline.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "string", + "description": "Type: `string`, required, example: `foo`. ID of the sample", + "help_text": "Type: `string`, required, example: `foo`. ID of the sample." + + } + + + , + "fastq_1": { + "type": + "string", + "description": "Type: `file`, required, example: `input.fastq.gz`. Path to the sample (or read 1 of paired end sample)", + "help_text": "Type: `file`, required, example: `input.fastq.gz`. Path to the sample (or read 1 of paired end sample)." + + } + + + , + "fastq_2": { + "type": + "string", + "description": "Type: `file`. Path to read 2 of the sample", + "help_text": "Type: `file`. Path to read 2 of the sample." + + } + + + , + "strandedness": { + "type": + "string", + "description": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity", + "help_text": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity. Must be one of unstranded, forward, or reverse", + "enum": ["forward", "reverse", "unstranded"] + + + } + + + , + "gtf": { + "type": + "string", + "description": "Type: `file`. GTF file", + "help_text": "Type: `file`. GTF file" + + } + + + , + "transcript_fasta": { + "type": + "string", + "description": "Type: `file`. Fasta file of the reference transcriptome", + "help_text": "Type: `file`. Fasta file of the reference transcriptome." + + } + + + , + "pseudo_aligner": { + "type": + "string", + "description": "Type: `string`, default: `false`, choices: ``salmon`, `kallisto``. Specifies the pseudo aligner to use - available options are \u0027salmon\u0027", + "help_text": "Type: `string`, default: `false`, choices: ``salmon`, `kallisto``. Specifies the pseudo aligner to use - available options are \u0027salmon\u0027. Runs in addition to \u0027--aligner\u0027.", + "enum": ["salmon", "kallisto"] + + , + "default":"false" + } + + + , + "salmon_index": { + "type": + "string", + "description": "Type: `file`. Salmon index", + "help_text": "Type: `file`. Salmon index" + + } + + + , + "kallisto_index": { + "type": + "string", + "description": "Type: `file`. Kallisto index", + "help_text": "Type: `file`. Kallisto index" + + } + + + , + "lib_type": { + "type": + "string", + "description": "Type: `string`, default: ``. Override library type inferred based on strandedness defined in meta object", + "help_text": "Type: `string`, default: ``. Override library type inferred based on strandedness defined in meta object" + , + "default":"" + } + + + , + "kallisto_quant_fragment_length": { + "type": + "number", + "description": "Type: `double`. For single-end mode only, the estimated average fragment length to use for quantification with Kallisto", + "help_text": "Type: `double`. For single-end mode only, the estimated average fragment length to use for quantification with Kallisto." + + } + + + , + "kallisto_quant_fragment_length_sd": { + "type": + "number", + "description": "Type: `double`. For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto", + "help_text": "Type: `double`. For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "pseudo_multiqc": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.pseudo_multiqc`. ", + "help_text": "Type: `file`, default: `$id.$key.pseudo_multiqc`. " + , + "default":"$id.$key.pseudo_multiqc" + } + + + , + "quant_out_dir": { + "type": + "string", + "description": "Type: `file`, default: `$id.quant`. ", + "help_text": "Type: `file`, default: `$id.quant`. " + , + "default":"$id.quant" + } + + + , + "salmon_quant_results_file": { + "type": + "string", + "description": "Type: `file`, default: `$id.quant.sf`. ", + "help_text": "Type: `file`, default: `$id.quant.sf`. " + , + "default":"$id.quant.sf" + } + + + , + "kallisto_quant_results_file": { + "type": + "string", + "description": "Type: `file`, default: `$id.abundance.tsv`. ", + "help_text": "Type: `file`, default: `$id.abundance.tsv`. " + , + "default":"$id.abundance.tsv" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/workflows/quality_control/.config.vsh.yaml b/target/nextflow/workflows/quality_control/.config.vsh.yaml new file mode 100644 index 0000000..3208662 --- /dev/null +++ b/target/nextflow/workflows/quality_control/.config.vsh.yaml @@ -0,0 +1,1615 @@ +name: "quality_control" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "ID of the sample" + info: null + example: + - "test" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, reverse" + info: null + default: + - "unstranded" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--paired" + description: "Paired-end reads?" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam" + description: "Input genome BAM file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_index" + description: "Genome BAM index file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gene_bed" + description: "Path to BED file containing gene intervals. This will be created\ + \ from the GTF file if not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "GTF file" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_group_features" + description: "Define the attribute type used to group features in the GTF file\ + \ when running Salmon." + info: null + default: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_extra_attributes" + description: "By default, the pipeline uses the gene_name field to obtain additional\ + \ gene identifiers from the input GTF file when running Salmon." + info: null + default: + - "gene_name" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_out_dir" + description: "Directory containing Salmon quantification results." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_results_file" + description: "Salmon quantification file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_quant_out_dir" + description: "Directory containing quantification results for pseudo alignment." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_salmon_quant_results_file" + description: "Quantification file from Salmon for pseudo alignment." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_kallisto_quant_results_file" + description: "Quantification file from Kallisto for pseudo alignment." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--aligner" + description: "Method used for alognment and qqunatification." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--pseudo_aligner" + description: "Method used for pseudo alignment and quantification." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_counts_gene" + description: "Expression counts on gene level" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_counts_transcripts" + description: "Expression counts on transcript level" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--skip_qc" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_biotype_qc" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_align" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_pseudo_align" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_dupradar" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_qualimap" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_rseqc" + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_multiqc" + info: null + direction: "input" + - type: "boolean" + name: "--skip_preseq" + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_preseq_args" + info: null + default: + - "-verbose -bam -seed 1" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--featurecounts_group_type" + description: "The attribute type used to group feature types in the GTF file when\ + \ generating the biotype plot with featureCounts." + info: null + default: + - "gene_biotype" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--featurecounts_feature_type" + description: "By default, the pipeline assigns reads based on the 'exon' attribute\ + \ within the GTF file." + info: null + default: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--gencode" + description: "Specify if the GTF annotation is in GENCODE format." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--biotypes_header" + info: null + default: + - "src/assets/multiqc/biotypes_header.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--biotype" + description: "Biotype value to use while appending entries to GTF file when additional\ + \ fasta file is provided." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--rseqc_modules" + description: "Specify the RSeQC modules to run_wf" + info: null + default: + - "bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication" + required: false + choices: + - "bam_stat" + - "inner_distance" + - "infer_experiment" + - "junction_annotation" + - "junction_saturation" + - "read_distribution" + - "read_duplication" + - "tin" + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--sample_size" + description: "Numer of reads sampled from SAM/BAM file to infer experiment and\ + \ calculate inner distance, default = 200000." + info: null + default: + - 200000 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--lower_bound_size" + description: "Lower bound of inner distance (bp). This option is used for ploting\ + \ histograme, default = -250." + info: null + default: + - -250 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--upper_bound_size" + description: "Upper bound of inner distance (bp). This option is used for ploting\ + \ histograme, default = 250." + info: null + default: + - 250 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--step_size" + description: "Step size (bp) of histograme of inner distance. This option is used\ + \ for plotting histogram, default = 5." + info: null + default: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--map_qual" + description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ + \ reads, default = 30." + info: null + default: + - 30 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_intron" + description: "Minimum intron length (bp) to call a junction, default = 50." + info: null + default: + - 50 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_splice_read" + description: "Minimum number of supporting reads to call a junction, default =\ + \ 1." + info: null + default: + - 1 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sampling_percentile_lower_bound" + description: "Read sampling for junction saturation starts from this percentile,\ + \ must be an integer between 0 and 100, default = 5." + info: null + default: + - 5 + required: false + min: 0 + max: 100 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sampling_percentile_upper_bound" + description: "Read sampling for junction saturation ends at this percentile, must\ + \ be an integer between 0 and 100, default = 5." + info: null + default: + - 100 + required: false + min: 0 + max: 100 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sampling_percentile_step" + description: "Read sampling for junction saturation frequency in %. Smaller value\ + \ means more sampling times. Must be an integer between 0 and 100, default =\ + \ 5." + info: null + default: + - 5 + required: false + min: 0 + max: 100 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--read_count_upper_limit" + description: "Upper limit of reads' occurence to determine read duplication. Only\ + \ used for plotting, default = 500 (times)." + info: null + default: + - 500 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--minimum_coverage" + description: "Minimum number of reads mapped to a transcript to determin tin,\ + \ default = 10." + info: null + default: + - 10 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--tin_sample_size" + description: "Number of equal-spaced nucleotide positions picked from mRNA. Note,\ + \ if this number is larger than the length of mRNA (L), it will be halved until\ + \ it's smaller than L (default = 100)" + info: null + default: + - 100 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--subtract_background" + description: "Set flag to subtract background noise (estimated from intronic reads)\ + \ to determine tin. Only use this option if there are substantial intronic reads." + info: null + direction: "input" + - type: "integer" + name: "--pr_bases" + description: "Number of upstream/downstream nucleotide bases to compute 5'-3'\ + \ bias for qualimap (default = 100)." + info: null + default: + - 100 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--tr_bias" + description: "Number of top highly expressed transcripts to compute 5'-3' bias\ + \ for qualimap (default = 1000)." + info: null + default: + - 1000 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--algorithm" + description: "Counting algorithm for qualimap (uniquely-mapped-reads (default)\ + \ or proportional)." + info: null + default: + - "uniquely-mapped-reads" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sequencing_protocol" + description: "Sequencing library protocol for qualimap (strand-specific-forward,\ + \ strand-specific-reverse or non-strand-specific (default))." + info: null + default: + - "non-strand-specific" + required: false + choices: + - "non-strand-specific" + - "strand-specific-reverse" + - "strand-specific-forward" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--sorted" + description: "Setting this flag indicates that the input file is already sorted\ + \ by name. If flag is not set, additional sorting by name will be performed\ + \ for qualimap. Only requiredfor paired-end analysis." + info: null + direction: "input" + - type: "string" + name: "--java_memory_size" + description: "maximum Java heap memory size for qualimap, default = 4G." + info: null + default: + - "4G" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_deseq2_qc" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--deseq2_vst" + description: "Use vst transformation instead of rlog with DESeq2" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_custom_config" + description: "Custom multiqc configuration file\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--multiqc_title" + description: "Custom multiqc title\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_methods_description" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--passed_trimmed_reads" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--num_trimmed_reads" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--passed_mapping" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--percent_mapped" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_zip_1" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_zip_2" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_zip_1" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_zip_2" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_log_1" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_log_2" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sortmerna_multiqc" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_multiqc" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_multiqc" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_stats" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_flagstat" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_idxstats" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--markduplicates_multiqc" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_multiqc" + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--preseq_output" + info: null + default: + - "$id.lc_extrap.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bamstat_output" + description: "Path to output file (txt) of mapping quality statistics" + info: null + default: + - "$id.mapping_quality.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--strandedness_output" + description: "Path to output report (txt) of inferred strandedness" + info: null + default: + - "$id.strandedness.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_stats" + description: "output file (txt) with summary statistics of inner distances of\ + \ paired reads" + info: null + default: + - "$id.inner_distance.stats" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_dist" + description: "output file (txt) with inner distances of all paired reads" + info: null + default: + - "$id.inner_distance.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_freq" + description: "output file (txt) with frequencies of inner distances of all paired\ + \ reads" + info: null + default: + - "$id.inner_distance_freq.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_plot" + description: "output file (pdf) with histogram plot of of inner distances of all\ + \ paired reads" + info: null + default: + - "$id.inner_distance_plot.pdf" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_plot_r" + description: "output file (R) with script of histogram plot of of inner distances\ + \ of all paired reads" + info: null + default: + - "$id.inner_distance_plot.r" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_log" + description: "output log of junction annotation script" + info: null + default: + - "$id.junction_annotation.log" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_plot_r" + description: "R script to generate splice_junction and splice_events plot" + info: null + default: + - "$id.junction_annotation_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_junction_bed" + description: "junction annotation file (bed format)" + info: null + default: + - "$id.junction_annotation.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_junction_interact" + description: "interact file (bed format) of junctions. Can be uploaded to UCSC\ + \ genome browser or converted to bigInteract (using bedToBigBed program) for\ + \ visualization." + info: null + default: + - "$id.junction_annotation.Interact.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_junction_sheet" + description: "junction annotation file (xls format)" + info: null + default: + - "$id.junction_annotation.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_splice_events_plot" + description: "plot of splice events (pdf)" + info: null + default: + - "$id.splice_events.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_splice_junctions_plot" + description: "plot of junctions (pdf)" + info: null + default: + - "$id.splice_junctions_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_saturation_output_plot_r" + description: "r script to generate junction_saturation_plot plot" + info: null + default: + - "$id.junction_saturation_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_saturation_output_plot" + description: "plot of junction saturation (pdf" + info: null + default: + - "$id.junction_saturation_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_distribution_output" + description: "output file (txt) of read distribution analysis." + info: null + default: + - "$id.read_distribution.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_plot_r" + description: "R script for generating duplication rate plot" + info: null + default: + - "$id.duplication_rate_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_plot" + description: "duplication rate plot (pdf)" + info: null + default: + - "$id.duplication_rate_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_mapping" + description: "Summary of mapping-based read duplication" + info: null + default: + - "$id.duplication_rate_mapping.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_sequence" + description: "Summary of sequencing-based read duplication" + info: null + default: + - "$id.duplication_rate_sequencing.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tin_output_summary" + description: "summary statistics (txt) of calculated TIN metrics" + info: null + default: + - "$id.tin_summary.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tin_output_metrics" + description: "file with TIN metrics (xls)" + info: null + default: + - "$id.tin.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_dupmatrix" + description: "path to output file (txt) of duplicate tag counts" + info: null + default: + - "$id.dup_matrix.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_dup_intercept_mqc" + description: "path to output file (txt) of multiqc intercept value DupRadar" + info: null + default: + - "$id.dup_intercept_mqc.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_duprate_exp_boxplot" + description: "path to output file (pdf) of distribution of expression box plot" + info: null + default: + - "$id.duprate_exp_boxplot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_duprate_exp_densplot" + description: "path to output file (pdf) of 2D density scatter plot of duplicate\ + \ tag counts" + info: null + default: + - "$id.duprate_exp_densityplot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_duprate_exp_denscurve_mqc" + description: "path to output file (pdf) of density curve of gene duplication multiqc" + info: null + default: + - "$id.duprate_exp_density_curve_mqc.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_expression_histogram" + description: "path to output file (pdf) of distribution of RPK values per gene\ + \ histogram" + info: null + default: + - "$id.expression_hist.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_intercept_slope" + info: null + default: + - "$id.intercept_slope.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_qc_report" + description: "Text file containing the RNAseq QC results." + info: null + example: + - "$id.rnaseq_qc_results.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_counts" + description: "Output file for computed counts." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_report" + description: "Report output file. Supported formats are PDF or HTML." + info: null + example: + - "$id.report.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--deseq2_output" + info: null + default: + - "deseq2" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--deseq2_output_pseudo" + info: null + default: + - "deseq2_pseudo" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_report" + info: null + default: + - "multiqc_report.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_data" + info: null + default: + - "multiqc_data" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_plots" + info: null + default: + - "multiqc_plots" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts" + info: null + default: + - "$id.featureCounts.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_summary" + info: null + default: + - "$id.featureCounts.txt.summary" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_multiqc" + info: null + default: + - "$id.featureCounts_mqc.tsv" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_rrna_multiqc" + info: null + default: + - "$id.featureCounts_rrna_mqc.tsv" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_gene" + info: null + default: + - "salmon.merged.gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene" + info: null + default: + - "salmon.merged.gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_length_scaled" + info: null + default: + - "salmon.merged.gene_counts_length_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_scaled" + info: null + default: + - "salmon.merged.gene_counts_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_transcript" + info: null + default: + - "salmon.merged.transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcript" + info: null + default: + - "salmon.merged.transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_merged_summarizedexperiment" + info: null + default: + - "salmon_merged_summarizedexperiment" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_tpm_gene" + info: null + default: + - "pseudo_gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_gene" + info: null + default: + - "pseudo_gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_gene_length_scaled" + info: null + default: + - "pseudo_gene_counts_length_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_gene_scaled" + info: null + default: + - "pseudo_gene_counts_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_tpm_transcript" + info: null + default: + - "pseudo_transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_transcript" + info: null + default: + - "pseudo_transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_quant_merged_summarizedexperiment" + info: null + default: + - "pseudo_quant_merged_summarizedexperiment" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A subworkflow for the final quality control stage of the nf-core/rnaseq\ + \ pipeline.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "rseqc/rseqc_bamstat" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "rseqc/rseqc_inferexperiment" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "rseqc/rseqc_inner_distance" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "rseqc/rseqc_junctionannotation" + repository: + type: "local" +- name: "rseqc/rseqc_junctionsaturation" + repository: + type: "local" +- name: "rseqc/rseqc_readdistribution" + repository: + type: "local" +- name: "rseqc/rseqc_readduplication" + repository: + type: "local" +- name: "rseqc/rseqc_tin" + repository: + type: "local" +- name: "dupradar" + repository: + type: "local" +- name: "qualimap/qualimap_rnaseq" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "preseq_lcextrap" + repository: + type: "local" +- name: "featurecounts" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "multiqc_custom_biotype" + repository: + type: "local" +- name: "deseq2_qc" + repository: + type: "local" +- name: "prepare_multiqc_input" + repository: + type: "local" +- name: "multiqc" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "rsem_merge_counts" + repository: + type: "local" +- name: "workflows/merge_quant_results" + repository: + type: "local" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/quality_control/config.vsh.yaml" + runner: "nextflow" + engine: "native" + output: "target/nextflow/workflows/quality_control" + executable: "target/nextflow/workflows/quality_control/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance" + - "target/nextflow/rseqc/rseqc_junctionannotation" + - "target/nextflow/rseqc/rseqc_junctionsaturation" + - "target/nextflow/rseqc/rseqc_readdistribution" + - "target/nextflow/rseqc/rseqc_readduplication" + - "target/nextflow/rseqc/rseqc_tin" + - "target/nextflow/dupradar" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq" + - "target/nextflow/preseq_lcextrap" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts" + - "target/nextflow/multiqc_custom_biotype" + - "target/nextflow/deseq2_qc" + - "target/nextflow/prepare_multiqc_input" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc" + - "target/nextflow/rsem_merge_counts" + - "target/nextflow/workflows/merge_quant_results" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/workflows/quality_control/main.nf b/target/nextflow/workflows/quality_control/main.nf new file mode 100644 index 0000000..749a59e --- /dev/null +++ b/target/nextflow/workflows/quality_control/main.nf @@ -0,0 +1,5891 @@ +// quality_control v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "quality_control", + "namespace" : "workflows", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "ID of the sample", + "example" : [ + "test" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--strandedness", + "description" : "Sample strand-specificity. Must be one of unstranded, forward, reverse", + "default" : [ + "unstranded" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--paired", + "description" : "Paired-end reads?", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam", + "description" : "Input genome BAM file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_index", + "description" : "Genome BAM index file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gene_bed", + "description" : "Path to BED file containing gene intervals. This will be created from the GTF file if not specified.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf", + "description" : "GTF file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--gtf_group_features", + "description" : "Define the attribute type used to group features in the GTF file when running Salmon.", + "default" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--gtf_extra_attributes", + "description" : "By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon.", + "default" : [ + "gene_name" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--quant_out_dir", + "description" : "Directory containing Salmon quantification results.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--quant_results_file", + "description" : "Salmon quantification file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_quant_out_dir", + "description" : "Directory containing quantification results for pseudo alignment.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_salmon_quant_results_file", + "description" : "Quantification file from Salmon for pseudo alignment.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_kallisto_quant_results_file", + "description" : "Quantification file from Kallisto for pseudo alignment.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--aligner", + "description" : "Method used for alognment and qqunatification.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--pseudo_aligner", + "description" : "Method used for pseudo alignment and quantification.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--rsem_counts_gene", + "description" : "Expression counts on gene level", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--rsem_counts_transcripts", + "description" : "Expression counts on transcript level", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--skip_qc", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_biotype_qc", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_align", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_pseudo_align", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_dupradar", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_qualimap", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_rseqc", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_multiqc", + "direction" : "input" + }, + { + "type" : "boolean", + "name" : "--skip_preseq", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extra_preseq_args", + "default" : [ + "-verbose -bam -seed 1" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--featurecounts_group_type", + "description" : "The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts.", + "default" : [ + "gene_biotype" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--featurecounts_feature_type", + "description" : "By default, the pipeline assigns reads based on the 'exon' attribute within the GTF file.", + "default" : [ + "exon" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--gencode", + "description" : "Specify if the GTF annotation is in GENCODE format.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--biotypes_header", + "default" : [ + "src/assets/multiqc/biotypes_header.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--biotype", + "description" : "Biotype value to use while appending entries to GTF file when additional fasta file is provided.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--rseqc_modules", + "description" : "Specify the RSeQC modules to run_wf", + "default" : [ + "bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication" + ], + "required" : false, + "choices" : [ + "bam_stat", + "inner_distance", + "infer_experiment", + "junction_annotation", + "junction_saturation", + "read_distribution", + "read_duplication", + "tin" + ], + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sample_size", + "description" : "Numer of reads sampled from SAM/BAM file to infer experiment and calculate inner distance, default = 200000.", + "default" : [ + 200000 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--lower_bound_size", + "description" : "Lower bound of inner distance (bp). This option is used for ploting histograme, default = -250.", + "default" : [ + -250 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--upper_bound_size", + "description" : "Upper bound of inner distance (bp). This option is used for ploting histograme, default = 250.", + "default" : [ + 250 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--step_size", + "description" : "Step size (bp) of histograme of inner distance. This option is used for plotting histogram, default = 5.", + "default" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--map_qual", + "description" : "Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default = 30.", + "default" : [ + 30 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_intron", + "description" : "Minimum intron length (bp) to call a junction, default = 50.", + "default" : [ + 50 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_splice_read", + "description" : "Minimum number of supporting reads to call a junction, default = 1.", + "default" : [ + 1 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sampling_percentile_lower_bound", + "description" : "Read sampling for junction saturation starts from this percentile, must be an integer between 0 and 100, default = 5.", + "default" : [ + 5 + ], + "required" : false, + "min" : 0, + "max" : 100, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sampling_percentile_upper_bound", + "description" : "Read sampling for junction saturation ends at this percentile, must be an integer between 0 and 100, default = 5.", + "default" : [ + 100 + ], + "required" : false, + "min" : 0, + "max" : 100, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sampling_percentile_step", + "description" : "Read sampling for junction saturation frequency in %. Smaller value means more sampling times. Must be an integer between 0 and 100, default = 5.", + "default" : [ + 5 + ], + "required" : false, + "min" : 0, + "max" : 100, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--read_count_upper_limit", + "description" : "Upper limit of reads' occurence to determine read duplication. Only used for plotting, default = 500 (times).", + "default" : [ + 500 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--minimum_coverage", + "description" : "Minimum number of reads mapped to a transcript to determin tin, default = 10.", + "default" : [ + 10 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--tin_sample_size", + "description" : "Number of equal-spaced nucleotide positions picked from mRNA. Note, if this number is larger than the length of mRNA (L), it will be halved until it's smaller than L (default = 100)", + "default" : [ + 100 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--subtract_background", + "description" : "Set flag to subtract background noise (estimated from intronic reads) to determine tin. Only use this option if there are substantial intronic reads.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--pr_bases", + "description" : "Number of upstream/downstream nucleotide bases to compute 5'-3' bias for qualimap (default = 100).", + "default" : [ + 100 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--tr_bias", + "description" : "Number of top highly expressed transcripts to compute 5'-3' bias for qualimap (default = 1000).", + "default" : [ + 1000 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--algorithm", + "description" : "Counting algorithm for qualimap (uniquely-mapped-reads (default) or proportional).", + "default" : [ + "uniquely-mapped-reads" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sequencing_protocol", + "description" : "Sequencing library protocol for qualimap (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).", + "default" : [ + "non-strand-specific" + ], + "required" : false, + "choices" : [ + "non-strand-specific", + "strand-specific-reverse", + "strand-specific-forward" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--sorted", + "description" : "Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed for qualimap. Only requiredfor paired-end analysis.", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--java_memory_size", + "description" : "maximum Java heap memory size for qualimap, default = 4G.", + "default" : [ + "4G" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--skip_deseq2_qc", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--deseq2_vst", + "description" : "Use vst transformation instead of rlog with DESeq2", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--multiqc_custom_config", + "description" : "Custom multiqc configuration file\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--multiqc_title", + "description" : "Custom multiqc title\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--multiqc_methods_description", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--passed_trimmed_reads", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--num_trimmed_reads", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--passed_mapping", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--percent_mapped", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_zip_1", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_zip_2", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_zip_1", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_zip_2", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_log_1", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_log_2", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sortmerna_multiqc", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--star_multiqc", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--rsem_multiqc", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_stats", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_flagstat", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_idxstats", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--markduplicates_multiqc", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_multiqc", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--preseq_output", + "default" : [ + "$id.lc_extrap.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bamstat_output", + "description" : "Path to output file (txt) of mapping quality statistics", + "default" : [ + "$id.mapping_quality.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--strandedness_output", + "description" : "Path to output report (txt) of inferred strandedness", + "default" : [ + "$id.strandedness.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--inner_dist_output_stats", + "description" : "output file (txt) with summary statistics of inner distances of paired reads", + "default" : [ + "$id.inner_distance.stats" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--inner_dist_output_dist", + "description" : "output file (txt) with inner distances of all paired reads", + "default" : [ + "$id.inner_distance.txt" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--inner_dist_output_freq", + "description" : "output file (txt) with frequencies of inner distances of all paired reads", + "default" : [ + "$id.inner_distance_freq.txt" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--inner_dist_output_plot", + "description" : "output file (pdf) with histogram plot of of inner distances of all paired reads", + "default" : [ + "$id.inner_distance_plot.pdf" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--inner_dist_output_plot_r", + "description" : "output file (R) with script of histogram plot of of inner distances of all paired reads", + "default" : [ + "$id.inner_distance_plot.r" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_log", + "description" : "output log of junction annotation script", + "default" : [ + "$id.junction_annotation.log" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_plot_r", + "description" : "R script to generate splice_junction and splice_events plot", + "default" : [ + "$id.junction_annotation_plot.r" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_junction_bed", + "description" : "junction annotation file (bed format)", + "default" : [ + "$id.junction_annotation.bed" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_junction_interact", + "description" : "interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization.", + "default" : [ + "$id.junction_annotation.Interact.bed" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_junction_sheet", + "description" : "junction annotation file (xls format)", + "default" : [ + "$id.junction_annotation.xls" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_splice_events_plot", + "description" : "plot of splice events (pdf)", + "default" : [ + "$id.splice_events.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_splice_junctions_plot", + "description" : "plot of junctions (pdf)", + "default" : [ + "$id.splice_junctions_plot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_saturation_output_plot_r", + "description" : "r script to generate junction_saturation_plot plot", + "default" : [ + "$id.junction_saturation_plot.r" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_saturation_output_plot", + "description" : "plot of junction saturation (pdf", + "default" : [ + "$id.junction_saturation_plot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read_distribution_output", + "description" : "output file (txt) of read distribution analysis.", + "default" : [ + "$id.read_distribution.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read_duplication_output_duplication_rate_plot_r", + "description" : "R script for generating duplication rate plot", + "default" : [ + "$id.duplication_rate_plot.r" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read_duplication_output_duplication_rate_plot", + "description" : "duplication rate plot (pdf)", + "default" : [ + "$id.duplication_rate_plot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read_duplication_output_duplication_rate_mapping", + "description" : "Summary of mapping-based read duplication", + "default" : [ + "$id.duplication_rate_mapping.xls" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read_duplication_output_duplication_rate_sequence", + "description" : "Summary of sequencing-based read duplication", + "default" : [ + "$id.duplication_rate_sequencing.xls" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tin_output_summary", + "description" : "summary statistics (txt) of calculated TIN metrics", + "default" : [ + "$id.tin_summary.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tin_output_metrics", + "description" : "file with TIN metrics (xls)", + "default" : [ + "$id.tin.xls" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_dupmatrix", + "description" : "path to output file (txt) of duplicate tag counts", + "default" : [ + "$id.dup_matrix.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_dup_intercept_mqc", + "description" : "path to output file (txt) of multiqc intercept value DupRadar", + "default" : [ + "$id.dup_intercept_mqc.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_duprate_exp_boxplot", + "description" : "path to output file (pdf) of distribution of expression box plot", + "default" : [ + "$id.duprate_exp_boxplot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_duprate_exp_densplot", + "description" : "path to output file (pdf) of 2D density scatter plot of duplicate tag counts", + "default" : [ + "$id.duprate_exp_densityplot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_duprate_exp_denscurve_mqc", + "description" : "path to output file (pdf) of density curve of gene duplication multiqc", + "default" : [ + "$id.duprate_exp_density_curve_mqc.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_expression_histogram", + "description" : "path to output file (pdf) of distribution of RPK values per gene histogram", + "default" : [ + "$id.expression_hist.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_intercept_slope", + "default" : [ + "$id.intercept_slope.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--qualimap_qc_report", + "description" : "Text file containing the RNAseq QC results.", + "example" : [ + "$id.rnaseq_qc_results.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--qualimap_counts", + "description" : "Output file for computed counts.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--qualimap_report", + "description" : "Report output file. Supported formats are PDF or HTML.", + "example" : [ + "$id.report.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--deseq2_output", + "default" : [ + "deseq2" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--deseq2_output_pseudo", + "default" : [ + "deseq2_pseudo" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--multiqc_report", + "default" : [ + "multiqc_report.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--multiqc_data", + "default" : [ + "multiqc_data" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--multiqc_plots", + "default" : [ + "multiqc_plots" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--featurecounts", + "default" : [ + "$id.featureCounts.txt" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--featurecounts_summary", + "default" : [ + "$id.featureCounts.txt.summary" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--featurecounts_multiqc", + "default" : [ + "$id.featureCounts_mqc.tsv" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--featurecounts_rrna_multiqc", + "default" : [ + "$id.featureCounts_rrna_mqc.tsv" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tpm_gene", + "default" : [ + "salmon.merged.gene_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene", + "default" : [ + "salmon.merged.gene_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene_length_scaled", + "default" : [ + "salmon.merged.gene_counts_length_scaled.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene_scaled", + "default" : [ + "salmon.merged.gene_counts_scaled.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tpm_transcript", + "default" : [ + "salmon.merged.transcript_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_transcript", + "default" : [ + "salmon.merged.transcript_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--quant_merged_summarizedexperiment", + "default" : [ + "salmon_merged_summarizedexperiment" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_tpm_gene", + "default" : [ + "pseudo_gene_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_counts_gene", + "default" : [ + "pseudo_gene_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_counts_gene_length_scaled", + "default" : [ + "pseudo_gene_counts_length_scaled.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_counts_gene_scaled", + "default" : [ + "pseudo_gene_counts_scaled.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_tpm_transcript", + "default" : [ + "pseudo_transcript_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_counts_transcript", + "default" : [ + "pseudo_transcript_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_quant_merged_summarizedexperiment", + "default" : [ + "pseudo_quant_merged_summarizedexperiment" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "A subworkflow for the final quality control stage of the nf-core/rnaseq pipeline.\n", + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "dependencies" : [ + { + "name" : "rseqc/rseqc_bamstat", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "rseqc/rseqc_inferexperiment", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "rseqc/rseqc_inner_distance", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "rseqc/rseqc_junctionannotation", + "repository" : { + "type" : "local" + } + }, + { + "name" : "rseqc/rseqc_junctionsaturation", + "repository" : { + "type" : "local" + } + }, + { + "name" : "rseqc/rseqc_readdistribution", + "repository" : { + "type" : "local" + } + }, + { + "name" : "rseqc/rseqc_readduplication", + "repository" : { + "type" : "local" + } + }, + { + "name" : "rseqc/rseqc_tin", + "repository" : { + "type" : "local" + } + }, + { + "name" : "dupradar", + "repository" : { + "type" : "local" + } + }, + { + "name" : "qualimap/qualimap_rnaseq", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "preseq_lcextrap", + "repository" : { + "type" : "local" + } + }, + { + "name" : "featurecounts", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "multiqc_custom_biotype", + "repository" : { + "type" : "local" + } + }, + { + "name" : "deseq2_qc", + "repository" : { + "type" : "local" + } + }, + { + "name" : "prepare_multiqc_input", + "repository" : { + "type" : "local" + } + }, + { + "name" : "multiqc", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "rsem_merge_counts", + "repository" : { + "type" : "local" + } + }, + { + "name" : "workflows/merge_quant_results", + "repository" : { + "type" : "local" + } + } + ], + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/workflows/quality_control/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native", + "output" : "target/nextflow/workflows/quality_control", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) +meta["root_dir"] = getRootDir() +include { rseqc_bamstat } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_bamstat/main.nf" +include { rseqc_inferexperiment } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inferexperiment/main.nf" +include { rseqc_inner_distance } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/rseqc/rseqc_inner_distance/main.nf" +include { rseqc_junctionannotation } from "${meta.resources_dir}/../../../nextflow/rseqc/rseqc_junctionannotation/main.nf" +include { rseqc_junctionsaturation } from "${meta.resources_dir}/../../../nextflow/rseqc/rseqc_junctionsaturation/main.nf" +include { rseqc_readdistribution } from "${meta.resources_dir}/../../../nextflow/rseqc/rseqc_readdistribution/main.nf" +include { rseqc_readduplication } from "${meta.resources_dir}/../../../nextflow/rseqc/rseqc_readduplication/main.nf" +include { rseqc_tin } from "${meta.resources_dir}/../../../nextflow/rseqc/rseqc_tin/main.nf" +include { dupradar } from "${meta.resources_dir}/../../../nextflow/dupradar/main.nf" +include { qualimap_rnaseq } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/qualimap/qualimap_rnaseq/main.nf" +include { preseq_lcextrap } from "${meta.resources_dir}/../../../nextflow/preseq_lcextrap/main.nf" +include { featurecounts } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/featurecounts/main.nf" +include { multiqc_custom_biotype } from "${meta.resources_dir}/../../../nextflow/multiqc_custom_biotype/main.nf" +include { deseq2_qc } from "${meta.resources_dir}/../../../nextflow/deseq2_qc/main.nf" +include { prepare_multiqc_input } from "${meta.resources_dir}/../../../nextflow/prepare_multiqc_input/main.nf" +include { multiqc } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/main.nf" +include { rsem_merge_counts } from "${meta.resources_dir}/../../../nextflow/rsem_merge_counts/main.nf" +include { merge_quant_results } from "${meta.resources_dir}/../../../nextflow/workflows/merge_quant_results/main.nf" + +// inner workflow +// user-provided Nextflow code +workflow run_wf { + + take: + input_ch + + main: + + qc_ch = input_ch + + // temporary fix to force assignment when alignment is skipped + | map {it} + + // Feature biotype QC using featureCounts + | map { id, state -> + def biotype_in_gtf = biotypeInGtf(state.gtf, state.biotype) + def attribute_type = state.gencode ? "gene_type" : state.featurecounts_group_type + def strand = (state.strandedness == "forward") ? 1 : ((state.strandedness == "reverse") ? 2 : 0) + [ id, state + [biotype_in_gtf: biotype_in_gtf, attribute_type: attribute_type, strand: strand] ] + } + + | featurecounts.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.biotype_in_gtf && !state.skip_align }, + fromState: [ + "paired": "paired", + "strand": "strand", + "annotation": "gtf", + "input": "genome_bam", + "attribute_type": "attribute_type", + "feature_type": "featurecounts_feature_type", + "count_read_pairs": "paired" + ], + toState: [ + "featurecounts": "counts", + "featurecounts_summary": "summary" + ], + args: [ + both_aligned: true, + same_strand: true + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | multiqc_custom_biotype.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.featurecounts && !state.skip_align }, + fromState: [ + "id": "id", + "biocounts": "featurecounts", + "biotypes_header": "biotypes_header" + ], + toState: [ + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | preseq_lcextrap.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_preseq && !state.skip_align }, + fromState: [ + "paired": "paired", + "input": "genome_bam", + "extra_preseq_args": "extra_preseq_args" + ], + toState: [ "preseq_output": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | rseqc_bamstat.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "bam_stat" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input_file": "genome_bam", + "mapq": "map_qual" + ], + toState: [ "bamstat_output": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_inferexperiment.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "infer_experiment" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input_file": "genome_bam", + "refgene": "gene_bed", + "sample_size": "sample_size", + "mapq": "map_qual" + ], + toState: [ "strandedness_output": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + // Get predicted strandedness from the RSeQC infer_experiment.py output + | map { id, state -> + def inferred_strand = getInferexperimentStrandedness(state.strandedness_output, 30) + def passed_strand_check = (state.strandedness != inferred_strand[0]) ? false : true + [ id, state + [ inferred_strand: inferred_strand, passed_strand_check: passed_strand_check ] ] + } + | rseqc_inner_distance.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && state.paired && "inner_distance" in state.rseqc_modules && !state.skip_align }, + key: "inner_distance", + args: ["output_prefix": "output"], + fromState: [ + "input_file": "genome_bam", + "refgene": "gene_bed", + "sample_size": "sample_size", + "mapq": "map_qual", + "lower_bound": "lower_bound_size", + "upper_bound": "upper_bound_size", + "step": "step_size", + ], + toState: [ + "inner_dist_output_stats": "output_stats", + "inner_dist_output_dist": "output_dist", + "inner_dist_output_freq": "output_freq", + "inner_dist_output_plot": "output_plot", + "inner_dist_output_plot_r": "output_plot_r" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_junctionannotation.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_annotation" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + "map_qual": "map_qual", + "min_intron": "min_intron" + ], + toState: [ + "junction_annotation_output_log": "output_log", + "junction_annotation_output_plot_r": "output_plot_r", + "junction_annotation_output_junction_bed": "output_junction_bed", + "junction_annotation_output_junction_interact": "output_junction_interact", + "junction_annotation_output_junction_sheet": "output_junction_sheet", + "junction_annotation_output_splice_events_plot": "output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "output_splice_junctions_plot" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_junctionsaturation.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_saturation" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + "sampling_percentile_lower_bound": "sampling_percentile_lower_bound", + "sampling_percentile_upper_bound": "sampling_percentile_upper_bound", + "sampling_percentile_step": "sampling_percentile_step", + "min_intron": "min_intron", + "min_splice_read": "min_splice_read", + "map_qual": "map_qual" + ], + toState: [ + "junction_saturation_output_plot_r": "output_plot_r", + "junction_saturation_output_plot": "output_plot" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_readdistribution.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_distribution" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + ], + toState: [ "read_distribution_output": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_readduplication.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_duplication" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "read_count_upper_limit": "read_count_upper_limit", + "map_qual": "map_qual" + ], + toState: [ + "read_duplication_output_duplication_rate_plot_r": "output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "output_duplication_rate_sequence" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + | rseqc_tin.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "tin" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "bam_input": "genome_bam", + "bai_input": "genome_bam_index", + "refgene": "gene_bed", + "minimum_coverage": "minimum_coverage", + "sample_size": "tin_sample_size", + "subtract_background": "subtract_background" + ], + toState: [ + "tin_output_summary": "output_tin_summary", + "tin_output_metrics": "output_tin" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | dupradar.run( + runIf: { id, state -> !state.skip_qc && !state.skip_dupradar && !state.skip_align }, + fromState: [ + "id": "id", + "input": "genome_bam", + "gtf_annotation": "gtf", + "paired": "paired", + "strandedness": "strandedness" + ], + toState: [ + "dupradar_output_dupmatrix": "output_dupmatrix", + "dupradar_output_dup_intercept_mqc": "output_dup_intercept_mqc", + "dupradar_output_duprate_exp_boxplot": "output_duprate_exp_boxplot", + "dupradar_output_duprate_exp_densplot": "output_duprate_exp_densplot", + "dupradar_output_duprate_exp_denscurve_mqc": "output_duprate_exp_denscurve_mqc", + "dupradar_output_expression_histogram": "output_expression_histogram", + "dupradar_output_intercept_slope": "output_intercept_slope" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // TODO: Add outdir as an output argument to the qualimap module on biobox. + // Qualimap ouputs a few more raw data files to outdir but since the module is using a temporary directory as output dir these files are lost. + | qualimap_rnaseq.run( + runIf: { id, state -> !state.skip_qc && !state.skip_qualimap && !state.skip_align }, + fromState: [ + "bam": "genome_bam", + "gtf": "gtf", + "num_pr_bases": "pr_bases", + "num_tr_bias": "tr_bias", + "algorithm": "algorithm", + "sequencing_protocol": "sequencing_protocol", + "sorted": "sorted", + "java_memory_size": "java_memory_size", + ], + toState: [ + "qualimap_report": "report", + "qualimap_qc_report": "qc_report", + "qualimap_counts": "counts" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + merged_ch = qc_ch + | toSortedList + | map { list -> + def ids = list.collect { id, state -> id } + def strandedness = list.collect { id, state -> state.strandedness } + def num_trimmed_reads = list.collect { id, state -> state.num_trimmed_reads } + def passed_trimmed_reads = list.collect { id, state -> state.passed_trimmed_reads } + def passed_mapping = list.collect { id, state -> state.passed_mapping } + def percent_mapped = list.collect { id, state -> state.percent_mapped } + def inferred_strand = list.collect { id, state -> state.inferred_strand } + def passed_strand_check = list.collect { id, state -> state.passed_strand_check } + def gtf = list.collect { id, state -> state.gtf }.unique()[0] + def gtf_extra_attributes = list.collect { id, state -> state.gtf_extra_attributes }.unique()[0] + def gtf_group_features = list.collect { id, state -> state.gtf_group_features }.unique()[0] + def pca_header_multiqc = list.collect { id, state -> state.pca_header_multiqc }.unique()[0] + def clustering_header_multiqc = list.collect { id, state -> state.clustering_header_multiqc }.unique()[0] + def aligner = list.collect { id, state -> state.aligner }.unique()[0] + def pseudo_aligner = list.collect { id, state -> state.pseudo_aligner }.unique()[0] + def deseq2_vst = list.collect { id, state -> state.deseq2_vst }.unique()[0] + def extra_deseq2_args = list.collect { id, state -> state.extra_deseq2_args }.unique()[0] + def extra_deseq2_args2 = list.collect { id, state -> state.extra_deseq2_args2 }.unique()[0] + def skip_deseq2_qc = list.collect { id, state -> state.skip_deseq2_qc }.unique()[0] + def skip_qc = list.collect { id, state -> state.skip_qc }.unique()[0] + def skip_align = list.collect { id, state -> state.skip_align }.unique()[0] + def skip_pseudo_align = list.collect { id, state -> state.skip_pseudo_align }.unique()[0] + def quant_results = list.collect { id, state -> + (state.quant_results_file instanceof java.nio.file.Path && state.quant_results_file.exists()) ? + state.quant_results_file : + null } + def rsem_counts_gene = list.collect { id, state -> + (state.rsem_counts_gene instanceof java.nio.file.Path && state.rsem_counts_gene.exists()) ? + state.rsem_counts_gene : + null } + def rsem_counts_transcripts = list.collect { id, state -> + (state.rsem_counts_transcripts instanceof java.nio.file.Path && state.rsem_counts_transcripts.exists()) ? + state.rsem_counts_transcripts : + null } + def pseudo_quant_out_dir = list.collect { id, state -> + (state.pseudo_quant_out_dir instanceof java.nio.file.Path && state.pseudo_quant_out_dir.exists()) ? + state.pseudo_quant_out_dir : + null } + def pseudo_salmon_quant_results = list.collect { id, state -> + (state.pseudo_salmon_quant_results_file instanceof java.nio.file.Path && state.pseudo_salmon_quant_results_file.exists()) ? + state.pseudo_salmon_quant_results_file : + null } + def pseudo_kallisto_quant_results = list.collect { id, state -> + (state.pseudo_kallisto_quant_results_file instanceof java.nio.file.Path && state.pseudo_kallisto_quant_results_file.exists()) ? + state.pseudo_kallisto_quant_results_file : + null } + def fastqc_zip_1 = list.collect { id, state -> + (state.fastqc_zip_1 instanceof java.nio.file.Path && state.fastqc_zip_1.exists()) ? + state.fastqc_zip_1 : + null } + def fastqc_zip_2 = list.collect { id, state -> + (state.fastqc_zip_2 instanceof java.nio.file.Path && state.fastqc_zip_2.exists()) ? + state.fastqc_zip_2 : + null } + def trim_zip_1 = list.collect { id, state -> + (state.trim_zip_1 instanceof java.nio.file.Path && state.trim_zip_1.exists()) ? + state.trim_zip_1 : + null } + def trim_zip_2 = list.collect { id, state -> + (state.trim_zip_2 instanceof java.nio.file.Path && state.trim_zip_2.exists()) ? + state.trim_zip_2 : + null } + def trim_log_1 = list.collect { id, state -> + (state.trim_log_1 instanceof java.nio.file.Path && state.trim_log_1.exists()) ? + state.trim_log_1 : + null } + def trim_log_2 = list.collect { id, state -> + (state.trim_log_2 instanceof java.nio.file.Path && state.trim_log_2.exists()) ? + state.trim_log_2 : + null } + def sortmerna_multiqc = list.collect { id, state -> + (state.sortmerna_multiqc instanceof java.nio.file.Path && state.sortmerna_multiqc.exists()) ? + state.sortmerna_multiqc : + null } + def star_multiqc = list.collect { id, state -> + (state.star_multiqc instanceof java.nio.file.Path && state.star_multiqc.exists()) ? + state.star_multiqc : + null } + def genome_bam_stats = list.collect { id, state -> + (state.genome_bam_stats instanceof java.nio.file.Path && state.genome_bam_stats.exists()) ? + state.genome_bam_stats : + null } + def genome_bam_flagstat = list.collect { id, state -> + (state.genome_bam_flagstat instanceof java.nio.file.Path && state.genome_bam_flagstat.exists()) ? + state.genome_bam_flagstat : + null } + def genome_bam_idxstats = list.collect { id, state -> + (state.genome_bam_idxstats instanceof java.nio.file.Path && state.genome_bam_idxstats.exists()) ? + state.genome_bam_idxstats : + null } + def markduplicates_multiqc = list.collect { id, state -> + (state.markduplicates_multiqc instanceof java.nio.file.Path && state.markduplicates_multiqc.exists()) ? + state.markduplicates_multiqc : + null } + def salmon_multiqc = list.collect { id, state -> + (state.salmon_multiqc instanceof java.nio.file.Path && state.salmon_multiqc.exists()) ? + state.salmon_multiqc : + null } + def rsem_multiqc = list.collect { id, state -> + (state.rsem_multiqc instanceof java.nio.file.Path && state.rsem_multiqc.exists()) ? + state.rsem_multiqc : + null } + def pseudo_multiqc = list.collect { id, state -> + (state.pseudo_multiqc instanceof java.nio.file.Path && state.pseudo_multiqc.exists()) ? + state.pseudo_multiqc : + null } + def featurecounts_multiqc = list.collect { id, state -> + (state.featurecounts_multiqc instanceof java.nio.file.Path && state.featurecounts_multiqc.exists()) ? + state.featurecounts_multiqc : + null } + def featurecounts_rrna_multiqc = list.collect { id, state -> + (state.featurecounts_rrna_multiqc instanceof java.nio.file.Path && state.featurecounts_rrna_multiqc.exists()) ? + state.featurecounts_rrna_multiqc : + null } + def preseq_output = list.collect { id, state -> + (state.preseq_output instanceof java.nio.file.Path && state.preseq_output.exists()) ? + state.preseq_output : + null } + // def qualimap_output_dir = list.collect { id, state -> + // (state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ? + // state.qualimap_output_dir : + // null } + def dupradar_output_dup_intercept_mqc = list.collect { id, state -> + (state.dupradar_output_dup_intercept_mqc instanceof java.nio.file.Path && state.dupradar_output_dup_intercept_mqc.exists()) ? + state.dupradar_output_dup_intercept_mqc : + null } + def dupradar_output_duprate_exp_denscurve_mqc = list.collect { id, state -> + (state.dupradar_output_duprate_exp_denscurve_mqc instanceof java.nio.file.Path && state.dupradar_output_duprate_exp_denscurve_mqc.exists()) ? + state.dupradar_output_duprate_exp_denscurve_mqc : + null } + def bamstat_output = list.collect { id, state -> + (state.bamstat_output instanceof java.nio.file.Path && state.bamstat_output.exists()) ? + state.bamstat_output : + null } + def inferexperiment_multiqc = list.collect { id, state -> + (state.strandedness_output instanceof java.nio.file.Path && state.strandedness_output.exists()) ? + state.strandedness_output : + null } + def inner_dist_output_freq = list.collect { id, state -> + (state.inner_dist_output_freq instanceof java.nio.file.Path && state.inner_dist_output_freq.exists()) ? + state.inner_dist_output_freq : + null } + def junction_annotation_output_log = list.collect { id, state -> + (state.junction_annotation_output_log instanceof java.nio.file.Path && state.junction_annotation_output_log.exists()) ? + state.junction_annotation_output_log : + null } + def junction_saturation_output_plot_r = list.collect { id, state -> + (state.junction_saturation_output_plot_r instanceof java.nio.file.Path && state.junction_saturation_output_plot_r.exists()) ? + state.junction_saturation_output_plot_r : + null } + def read_distribution_output = list.collect { id, state -> + (state.read_distribution_output instanceof java.nio.file.Path && state.read_distribution_output.exists()) ? + state.read_distribution_output : + null } + def read_duplication_output_duplication_rate_mapping = list.collect { id, state -> + (state.read_duplication_output_duplication_rate_mapping instanceof java.nio.file.Path && state.read_duplication_output_duplication_rate_mapping.exists()) ? + state.read_duplication_output_duplication_rate_mapping : + null } + def tin_output_summary = list.collect { id, state -> + (state.tin_output_summary instanceof java.nio.file.Path && state.tin_output_summary.exists()) ? + state.tin_output_summary : + null } + def multiqc_custom_config = list.collect { id, state -> state.multiqc_custom_config }.unique()[0] + + ["merged", + [ + ids: ids, + strandedness: strandedness, + num_trimmed_reads: num_trimmed_reads, + passed_trimmed_reads: passed_trimmed_reads, + passed_mapping: passed_mapping, + percent_mapped: percent_mapped, + inferred_strand: inferred_strand, + passed_strand_check: passed_strand_check, + skip_align: skip_align, + skip_pseudo_align: skip_pseudo_align, + quant_results: quant_results.findAll { it != null }, + rsem_counts_gene: rsem_counts_gene.findAll { it != null }, + rsem_counts_transcripts: rsem_counts_transcripts.findAll { it != null }, + pseudo_quant_out_dir: pseudo_quant_out_dir.findAll { it != null }, + pseudo_salmon_quant_results: pseudo_salmon_quant_results.findAll { it != null }, + pseudo_kallisto_quant_results: pseudo_kallisto_quant_results.findAll { it != null }, + gtf: gtf, + gtf_extra_attributes: gtf_extra_attributes, + gtf_group_features: gtf_group_features, + pca_header_multiqc: pca_header_multiqc, + clustering_header_multiqc: clustering_header_multiqc, + aligner: aligner, + pseudo_aligner: pseudo_aligner, + deseq2_vst: deseq2_vst, + extra_deseq2_args: extra_deseq2_args, + extra_deseq2_args2: extra_deseq2_args2, + skip_deseq2_qc: skip_deseq2_qc, + fastqc_zip: fastqc_zip_1 + fastqc_zip_2, + trim_zip: trim_zip_1 + trim_zip_2, + trim_log: trim_log_1 + trim_log_2, + sortmerna_multiqc: sortmerna_multiqc, + star_multiqc: star_multiqc, + genome_bam_stats: genome_bam_stats, + genome_bam_flagstat: genome_bam_flagstat, + genome_bam_idxstats: genome_bam_idxstats, + markduplicates_multiqc: markduplicates_multiqc, + salmon_multiqc: salmon_multiqc, + rsem_multiqc: rsem_multiqc, + pseudo_multiqc: pseudo_multiqc, + featurecounts_multiqc: featurecounts_multiqc, + featurecounts_rrna_multiqc: featurecounts_rrna_multiqc, + preseq_output: preseq_output, + // qualimap_output_dir: qualimap_output_dir, + dupradar_output_dup_intercept_mqc: dupradar_output_dup_intercept_mqc, + dupradar_output_duprate_exp_denscurve_mqc: dupradar_output_duprate_exp_denscurve_mqc, + bamstat_output: bamstat_output, + inner_dist_output_freq: inner_dist_output_freq, + inferexperiment_multiqc: inferexperiment_multiqc, + junction_annotation_output_log: junction_annotation_output_log, + junction_saturation_output_plot_r: junction_saturation_output_plot_r, + read_distribution_output: read_distribution_output, + read_duplication_output_duplication_rate_mapping: read_duplication_output_duplication_rate_mapping, + tin_output_summary: tin_output_summary, + multiqc_custom_config: multiqc_custom_config + ] + ] + } + + // Merge quantification results of alignment + | merge_quant_results.run ( + runIf: { id, state -> !state.skip_align && state.aligner == 'star_salmon' }, + fromState: [ + "salmon_quant_results": "quant_results", + "gtf": "gtf", + "gtf_extra_attributes": "gtf_extra_attributes", + "gtf_group_features": "gtf_group_features" + ], + args: [ quant_type: "salmon"], + toState: [ + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "lengths_gene": "lengths_gene", + "lengths_transcript": "lengths_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment" + ], + key: "merge_quant_results", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | rsem_merge_counts.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "counts_gene": "rsem_counts_gene", + "counts_transcripts": "rsem_counts_transcripts" + ], + toState: [ + "tpm_gene": "merged_gene_tpm", + "counts_gene": "merged_gene_counts", + "tpm_transcript": "merged_transcript_tpm", + "counts_transcript": "merged_transcript_counts" + ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | deseq2_qc.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_deseq2_qc && !state.skip_align }, + fromState: { id, state -> + def counts = (state.aligner == "star_rsem") ? state.counts_gene : state.counts_gene_length_scaled + [ + counts: counts, + vst: state.deseq2_vst, + label: state.aligner + ] + }, + args: [count_col: 3, id_col: 1, outprefix: "deseq2"], + toState: [ + "deseq2_output": "outdir", + "deseq2_pca_multiqc": "pca_multiqc", + "deseq2_dists_multiqc": "dists_multiqc" + ], + key: "deseq2_qc_align_quant", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Merge quantification results of pseudo alignment + | merge_quant_results.run ( + runIf: { id, state -> !state.skip_pseudo_align }, + fromState: [ + "salmon_quant_results": "pseudo_salmon_quant_results", + "kallisto_quant_results": "pseudo_kallisto_quant_results", + "gtf": "gtf", + "gtf_extra_attributes": "gtf_extra_attributes", + "gtf_group_features": "gtf_group_features", + "quant_type": "pseudo_aligner" + ], + toState: [ + "pseudo_tpm_gene": "tpm_gene", + "pseudo_counts_gene": "counts_gene", + "pseudo_counts_gene_length_scaled": "counts_gene_length_scaled", + "pseudo_counts_gene_scaled": "counts_gene_scaled", + "pseudo_tpm_transcript": "tpm_transcript", + "pseudo_counts_transcript": "counts_transcript", + "pseudo_lengths_gene": "lengths_gene", + "pseudo_lengths_transcript": "lengths_transcript", + "pseudo_quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment" + ], + key: "merge_pseudo_quant_results", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | deseq2_qc.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_deseq2_qc && !state.skip_pseudo_align }, + fromState: [ + "counts": "pseudo_counts_gene_length_scaled", + "vst": "deseq2_vst", + "label": "pseudo_aligner" + ], + args: [count_col: 3, id_col: 1, outprefix: "deseq2"], + toState: [ + "deseq2_output_pseudo": "outdir", + "deseq2_pca_multiqc_pseudo": "pca_multiqc", + "deseq2_dists_multiqc_pseudo": "dists_multiqc" + ], + key: "deseq2_qc_pseuso_align_quant", + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + // Get list of samples that failed trimming, mapping, and strand check for MultiQC report + | map { id, state -> + def fail_trimming_header = ["Sample", "Reads after trimming"] + def fail_trimming_multiqc = "" + def star_mapping_header = ["Sample", "STAR uniquely mapped reads (%)"] + def fail_mapping_multiqc = "" + def strand_check_header = ["Sample", "Provided strandedness", "Inferred strandedness", "Sense (%)", "Antisense (%)", "Undetermined (%)"] + def fail_strand_multiqc = "" + if (state.ids.size() > 0) { + fail_trimming_multiqc += "${fail_trimming_header.join('\t')}\n" + fail_mapping_multiqc += "${star_mapping_header.join('\t')}\n" + fail_strand_multiqc += "${strand_check_header.join('\t')}\n" + for (i=0; i + state.each { key, value -> + if (value instanceof ArrayList) { + value.removeAll { it == null } + } + } + mod_state = state.findAll { key, value -> value != null } + [ id, mod_state ] + } + + | prepare_multiqc_input.run( + runIf: { id, state -> !state.skip_qc && !state.skip_multiqc }, + fromState: [ + "fail_trimming_multiqc": "fail_trimming_multiqc", + "fail_mapping_multiqc": "fail_mapping_multiqc", + "fail_strand_multiqc": "fail_strand_multiqc", + "fastqc_raw_multiqc": "fastqc_zip", + "fastqc_trim_multiqc": "trim_zip", + "trim_log_multiqc": "trim_log", + "sortmerna_multiqc": "sortmerna_multiqc", + "star_multiqc": "star_multiqc", + "salmon_multiqc": "salmon_multiqc", + "rsem_multiqc": "rsem_multiqc", + "pseudo_multiqc": "pseudo_multiqc", + "samtools_stats": "genome_bam_stats", + "samtools_flagstat": "genome_bam_flagstat", + "samtools_idxstats": "genome_bam_idxstats", + "markduplicates_multiqc": "markduplicates_multiqc", + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc", + "aligner_pca_multiqc": "deseq2_pca_multiqc", + "aligner_clustering_multiqc": "deseq2_dists_multiqc", + "pseudo_aligner_pca_multiqc": "deseq2_pca_multiqc_pseudo", + "pseudo_aligner_clustering_multiqc": "deseq2_dists_multiqc_pseudo", + "preseq_multiqc": "preseq_output", + // "qualimap_multiqc": "qualimap_output_dir", + "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", + "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", + "bamstat_multiqc": "bamstat_output", + "inferexperiment_multiqc": "inferexperiment_multiqc", + "innerdistance_multiqc": "inner_dist_output_freq", + "junctionannotation_multiqc": "junction_annotation_output_log", + "junctionsaturation_multiqc": "junction_saturation_output_plot_r", + "readdistribution_multiqc": "read_distribution_output", + "readduplication_multiqc": "read_duplication_output_duplication_rate_mapping", + "tin_multiqc": "tin_output_summary", + "multiqc_config": "multiqc_custom_config" + ], + toState: [ "multiqc_input": "output" ], + directives: [ label: [ "midmem", "midcpu" ] ] + ) + + | multiqc.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_multiqc }, + fromState: [ + "title": "multiqc_title", + "input": "multiqc_input", + ], + args: [exclude_modules: "general_stats"], + toState: [ + "multiqc_report": "output_report", + "multiqc_data": "output_data", + "multiqc_plots": "output_plots" + ], + directives: [ label: [ "highmem", "highcpu" ] ] + ) + + | map { id, state -> + [ + id, [ + tpm_gene: state.tpm_gene, + counts_gene: state.counts_gene, + counts_gene_length_scaled: state.counts_gene_length_scaled, + counts_gene_scaled: state.counts_gene_scaled, + tpm_transcript: state.tpm_transcript, + counts_transcript: state.counts_transcript, + quant_merged_summarizedexperiment: state.quant_merged_summarizedexperiment, + deseq2_output: state.deseq2_output, + pseudo_tpm_gene: state.pseudo_tpm_gene, + pseudo_counts_gene: state.pseudo_counts_gene, + pseudo_counts_gene_length_scaled: state.pseudo_counts_gene_length_scaled, + pseudo_counts_gene_scaled: state.pseudo_counts_gene_scaled, + pseudo_tpm_transcript: state.pseudo_tpm_transcript, + pseudo_counts_transcript: state.pseudo_counts_transcript, + pseudo_quant_merged_summarizedexperiment: state.pseudo_quant_merged_summarizedexperiment, + deseq2_output_pseudo: state.deseq2_output_pseudo, + multiqc_report: state.multiqc_report, + multiqc_data: state.multiqc_data, + multiqc_plots: state.multiqc_plots + ] + ] + } + + | map { list -> list[1]} + + output_ch = qc_ch + + | combine(merged_ch) + + | map { list -> [list[0], list[1] + list[2]] } + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + [ + "preseq_output": "preseq_output", + "bamstat_output": "bamstat_output", + "strandedness_output": "strandedness_output", + "inner_dist_output_stats": "inner_dist_output_stats", + "inner_dist_output_dist": "inner_dist_output_dist", + "inner_dist_output_freq": "inner_dist_output_freq", + "inner_dist_output_plot": "inner_dist_output_plot", + "inner_dist_output_plot_r": "inner_dist_output_plot_r", + "junction_annotation_output_log": "junction_annotation_output_log", + "junction_annotation_output_plot_r": "junction_annotation_output_plot_r", + "junction_annotation_output_junction_bed": "junction_annotation_output_junction_bed", + "junction_annotation_output_junction_interact": "junction_annotation_output_junction_interact", + "junction_annotation_output_junction_sheet": "junction_annotation_output_junction_sheet", + "junction_annotation_output_splice_events_plot": "junction_annotation_output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "junction_annotation_output_splice_junctions_plot", + "junction_saturation_output_plot_r": "junction_saturation_output_plot_r", + "junction_saturation_output_plot": "junction_saturation_output_plot", + "read_distribution_output": "read_distribution_output", + "read_duplication_output_duplication_rate_plot_r": "read_duplication_output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "read_duplication_output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "read_duplication_output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "read_duplication_output_duplication_rate_sequence", + "tin_output_summary": "tin_output_summary", + "tin_output_metrics": "tin_output_metrics", + "dupradar_output_dupmatrix": "dupradar_output_dupmatrix", + "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", + "dupradar_output_duprate_exp_boxplot": "dupradar_output_duprate_exp_boxplot", + "dupradar_output_duprate_exp_densplot": "dupradar_output_duprate_exp_densplot", + "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", + "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", + "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", + "featurecounts": "featurecounts", + "featurecounts_summary": "featurecounts_summary", + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc", + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "lengths_gene": "lengths_gene", + "lengths_transcript": "lengths_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment", + "deseq2_output": "deseq2_output", + "pseudo_tpm_gene": "pseudo_tpm_gene", + "pseudo_counts_gene": "pseudo_counts_gene", + "pseudo_counts_gene_length_scaled": "pseudo_counts_gene_length_scaled", + "pseudo_counts_gene_scaled": "pseudo_counts_gene_scaled", + "pseudo_tpm_transcript": "pseudo_tpm_transcript", + "pseudo_counts_transcript": "pseudo_counts_transcript", + "pseudo_lengths_gene": "pseudo_lengths_gene", + "pseudo_lengths_transcript": "pseudo_lengths_transcript", + "pseudo_quant_merged_summarizedexperiment": "pseudo_quant_merged_summarizedexperiment", + "deseq2_output_pseudo": "deseq2_output_pseudo", + "multiqc_report": "multiqc_report", + "multiqc_data": "multiqc_data", + "multiqc_plots": "multiqc_plots" + ] + ) + + emit: + output_ch +} + +// +// Function to check whether biotype field exists in GTF file +// +def biotypeInGtf(gtf_file, biotype) { + def hits = 0 + gtf_file.eachLine { line -> + def attributes = line.split('\t')[-1].split() + if (attributes.contains(biotype)) { + hits += 1 + } + } + if (hits) { + return true + } else { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Biotype attribute '${biotype}' not found in the last column of the GTF file!\n\n" + + " Biotype QC will be skipped to circumvent the issue below:\n" + + " https://github.com/nf-core/rnaseq/issues/460\n\n" + + " Amend '--featurecounts_group_type' to change this behaviour.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + return false + } +} + +// +// Function that parses and returns the predicted strandedness from the RSeQC infer_experiment.py output +// +def getInferexperimentStrandedness(inferexperiment_file, cutoff=30) { + def sense = 0 + def antisense = 0 + def undetermined = 0 + inferexperiment_file.eachLine { line -> + def undetermined_matcher = line =~ /Fraction of reads failed to determine:\s([\d\.]+)/ + def se_sense_matcher = line =~ /Fraction of reads explained by "\++,--":\s([\d\.]+)/ + def se_antisense_matcher = line =~ /Fraction of reads explained by "\+-,-\+":\s([\d\.]+)/ + def pe_sense_matcher = line =~ /Fraction of reads explained by "1\++,1--,2\+-,2-\+":\s([\d\.]+)/ + def pe_antisense_matcher = line =~ /Fraction of reads explained by "1\+-,1-\+,2\+\+,2--":\s([\d\.]+)/ + if (undetermined_matcher) undetermined = undetermined_matcher[0][1].toFloat() * 100 + if (se_sense_matcher) sense = se_sense_matcher[0][1].toFloat() * 100 + if (se_antisense_matcher) antisense = se_antisense_matcher[0][1].toFloat() * 100 + if (pe_sense_matcher) sense = pe_sense_matcher[0][1].toFloat() * 100 + if (pe_antisense_matcher) antisense = pe_antisense_matcher[0][1].toFloat() * 100 + } + def strandedness = 'unstranded' + if (sense >= 100-cutoff) { + strandedness = 'forward' + } else if (antisense >= 100-cutoff) { + strandedness = 'reverse' + } + return [ strandedness, sense, antisense, undetermined ] +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/workflows/quality_control/nextflow.config b/target/nextflow/workflows/quality_control/nextflow.config new file mode 100644 index 0000000..98d2404 --- /dev/null +++ b/target/nextflow/workflows/quality_control/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'workflows/quality_control' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'A subworkflow for the final quality control stage of the nf-core/rnaseq pipeline.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/workflows/quality_control/nextflow_labels.config b/target/nextflow/workflows/quality_control/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/workflows/quality_control/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/workflows/quality_control/nextflow_schema.json b/target/nextflow/workflows/quality_control/nextflow_schema.json new file mode 100644 index 0000000..0357263 --- /dev/null +++ b/target/nextflow/workflows/quality_control/nextflow_schema.json @@ -0,0 +1,1510 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "quality_control", +"description": "A subworkflow for the final quality control stage of the nf-core/rnaseq pipeline.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "string", + "description": "Type: `string`, required, example: `test`. ID of the sample", + "help_text": "Type: `string`, required, example: `test`. ID of the sample" + + } + + + , + "strandedness": { + "type": + "string", + "description": "Type: `string`, default: `unstranded`. Sample strand-specificity", + "help_text": "Type: `string`, default: `unstranded`. Sample strand-specificity. Must be one of unstranded, forward, reverse" + , + "default":"unstranded" + } + + + , + "paired": { + "type": + "boolean", + "description": "Type: `boolean`. Paired-end reads?", + "help_text": "Type: `boolean`. Paired-end reads?" + + } + + + , + "genome_bam": { + "type": + "string", + "description": "Type: `file`. Input genome BAM file", + "help_text": "Type: `file`. Input genome BAM file" + + } + + + , + "genome_bam_index": { + "type": + "string", + "description": "Type: `file`. Genome BAM index file", + "help_text": "Type: `file`. Genome BAM index file" + + } + + + , + "gene_bed": { + "type": + "string", + "description": "Type: `file`. Path to BED file containing gene intervals", + "help_text": "Type: `file`. Path to BED file containing gene intervals. This will be created from the GTF file if not specified." + + } + + + , + "gtf": { + "type": + "string", + "description": "Type: `file`. GTF file", + "help_text": "Type: `file`. GTF file" + + } + + + , + "gtf_group_features": { + "type": + "string", + "description": "Type: `string`, default: `gene_id`. Define the attribute type used to group features in the GTF file when running Salmon", + "help_text": "Type: `string`, default: `gene_id`. Define the attribute type used to group features in the GTF file when running Salmon." + , + "default":"gene_id" + } + + + , + "gtf_extra_attributes": { + "type": + "string", + "description": "Type: `string`, default: `gene_name`. By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon", + "help_text": "Type: `string`, default: `gene_name`. By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon." + , + "default":"gene_name" + } + + + , + "quant_out_dir": { + "type": + "string", + "description": "Type: `file`. Directory containing Salmon quantification results", + "help_text": "Type: `file`. Directory containing Salmon quantification results." + + } + + + , + "quant_results_file": { + "type": + "string", + "description": "Type: `file`. Salmon quantification file", + "help_text": "Type: `file`. Salmon quantification file." + + } + + + , + "pseudo_quant_out_dir": { + "type": + "string", + "description": "Type: `file`. Directory containing quantification results for pseudo alignment", + "help_text": "Type: `file`. Directory containing quantification results for pseudo alignment." + + } + + + , + "pseudo_salmon_quant_results_file": { + "type": + "string", + "description": "Type: `file`. Quantification file from Salmon for pseudo alignment", + "help_text": "Type: `file`. Quantification file from Salmon for pseudo alignment." + + } + + + , + "pseudo_kallisto_quant_results_file": { + "type": + "string", + "description": "Type: `file`. Quantification file from Kallisto for pseudo alignment", + "help_text": "Type: `file`. Quantification file from Kallisto for pseudo alignment." + + } + + + , + "aligner": { + "type": + "string", + "description": "Type: `string`. Method used for alognment and qqunatification", + "help_text": "Type: `string`. Method used for alognment and qqunatification." + + } + + + , + "pseudo_aligner": { + "type": + "string", + "description": "Type: `string`. Method used for pseudo alignment and quantification", + "help_text": "Type: `string`. Method used for pseudo alignment and quantification." + + } + + + , + "rsem_counts_gene": { + "type": + "string", + "description": "Type: `file`. Expression counts on gene level", + "help_text": "Type: `file`. Expression counts on gene level" + + } + + + , + "rsem_counts_transcripts": { + "type": + "string", + "description": "Type: `file`. Expression counts on transcript level", + "help_text": "Type: `file`. Expression counts on transcript level" + + } + + + , + "skip_qc": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. ", + "help_text": "Type: `boolean_true`, default: `false`. " + , + "default":false + } + + + , + "skip_biotype_qc": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. ", + "help_text": "Type: `boolean_true`, default: `false`. " + , + "default":false + } + + + , + "skip_align": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. ", + "help_text": "Type: `boolean_true`, default: `false`. " + , + "default":false + } + + + , + "skip_pseudo_align": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. ", + "help_text": "Type: `boolean_true`, default: `false`. " + , + "default":false + } + + + , + "skip_dupradar": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. ", + "help_text": "Type: `boolean_true`, default: `false`. " + , + "default":false + } + + + , + "skip_qualimap": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. ", + "help_text": "Type: `boolean_true`, default: `false`. " + , + "default":false + } + + + , + "skip_rseqc": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. ", + "help_text": "Type: `boolean_true`, default: `false`. " + , + "default":false + } + + + , + "skip_multiqc": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. ", + "help_text": "Type: `boolean_true`, default: `false`. " + , + "default":false + } + + + , + "skip_preseq": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. ", + "help_text": "Type: `boolean`, default: `false`. " + , + "default":false + } + + + , + "extra_preseq_args": { + "type": + "string", + "description": "Type: `string`, default: `-verbose -bam -seed 1`. ", + "help_text": "Type: `string`, default: `-verbose -bam -seed 1`. " + , + "default":"-verbose -bam -seed 1" + } + + + , + "featurecounts_group_type": { + "type": + "string", + "description": "Type: `string`, default: `gene_biotype`. The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts", + "help_text": "Type: `string`, default: `gene_biotype`. The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts." + , + "default":"gene_biotype" + } + + + , + "featurecounts_feature_type": { + "type": + "string", + "description": "Type: `string`, default: `exon`. By default, the pipeline assigns reads based on the \u0027exon\u0027 attribute within the GTF file", + "help_text": "Type: `string`, default: `exon`. By default, the pipeline assigns reads based on the \u0027exon\u0027 attribute within the GTF file." + , + "default":"exon" + } + + + , + "gencode": { + "type": + "boolean", + "description": "Type: `boolean`. Specify if the GTF annotation is in GENCODE format", + "help_text": "Type: `boolean`. Specify if the GTF annotation is in GENCODE format." + + } + + + , + "biotypes_header": { + "type": + "string", + "description": "Type: `file`, default: `src/assets/multiqc/biotypes_header.txt`. ", + "help_text": "Type: `file`, default: `src/assets/multiqc/biotypes_header.txt`. " + , + "default":"src/assets/multiqc/biotypes_header.txt" + } + + + , + "biotype": { + "type": + "string", + "description": "Type: `string`. Biotype value to use while appending entries to GTF file when additional fasta file is provided", + "help_text": "Type: `string`. Biotype value to use while appending entries to GTF file when additional fasta file is provided." + + } + + + , + "rseqc_modules": { + "type": + "string", + "description": "Type: List of `string`, default: `bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication`, multiple_sep: `\";\"`, choices: ``bam_stat`, `inner_distance`, `infer_experiment`, `junction_annotation`, `junction_saturation`, `read_distribution`, `read_duplication`, `tin``. Specify the RSeQC modules to run_wf", + "help_text": "Type: List of `string`, default: `bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication`, multiple_sep: `\";\"`, choices: ``bam_stat`, `inner_distance`, `infer_experiment`, `junction_annotation`, `junction_saturation`, `read_distribution`, `read_duplication`, `tin``. Specify the RSeQC modules to run_wf" + , + "default":"bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication" + } + + + , + "sample_size": { + "type": + "integer", + "description": "Type: `integer`, default: `200000`. Numer of reads sampled from SAM/BAM file to infer experiment and calculate inner distance, default = 200000", + "help_text": "Type: `integer`, default: `200000`. Numer of reads sampled from SAM/BAM file to infer experiment and calculate inner distance, default = 200000." + , + "default":200000 + } + + + , + "lower_bound_size": { + "type": + "integer", + "description": "Type: `integer`, default: `-250`. Lower bound of inner distance (bp)", + "help_text": "Type: `integer`, default: `-250`. Lower bound of inner distance (bp). This option is used for ploting histograme, default = -250." + , + "default":-250 + } + + + , + "upper_bound_size": { + "type": + "integer", + "description": "Type: `integer`, default: `250`. Upper bound of inner distance (bp)", + "help_text": "Type: `integer`, default: `250`. Upper bound of inner distance (bp). This option is used for ploting histograme, default = 250." + , + "default":250 + } + + + , + "step_size": { + "type": + "integer", + "description": "Type: `integer`, default: `5`. Step size (bp) of histograme of inner distance", + "help_text": "Type: `integer`, default: `5`. Step size (bp) of histograme of inner distance. This option is used for plotting histogram, default = 5." + , + "default":5 + } + + + , + "map_qual": { + "type": + "integer", + "description": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default = 30", + "help_text": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default = 30." + , + "default":30 + } + + + , + "min_intron": { + "type": + "integer", + "description": "Type: `integer`, default: `50`. Minimum intron length (bp) to call a junction, default = 50", + "help_text": "Type: `integer`, default: `50`. Minimum intron length (bp) to call a junction, default = 50." + , + "default":50 + } + + + , + "min_splice_read": { + "type": + "integer", + "description": "Type: `integer`, default: `1`. Minimum number of supporting reads to call a junction, default = 1", + "help_text": "Type: `integer`, default: `1`. Minimum number of supporting reads to call a junction, default = 1." + , + "default":1 + } + + + , + "sampling_percentile_lower_bound": { + "type": + "integer", + "description": "Type: `integer`, default: `5`. Read sampling for junction saturation starts from this percentile, must be an integer between 0 and 100, default = 5", + "help_text": "Type: `integer`, default: `5`. Read sampling for junction saturation starts from this percentile, must be an integer between 0 and 100, default = 5." + , + "default":5 + } + + + , + "sampling_percentile_upper_bound": { + "type": + "integer", + "description": "Type: `integer`, default: `100`. Read sampling for junction saturation ends at this percentile, must be an integer between 0 and 100, default = 5", + "help_text": "Type: `integer`, default: `100`. Read sampling for junction saturation ends at this percentile, must be an integer between 0 and 100, default = 5." + , + "default":100 + } + + + , + "sampling_percentile_step": { + "type": + "integer", + "description": "Type: `integer`, default: `5`. Read sampling for junction saturation frequency in %", + "help_text": "Type: `integer`, default: `5`. Read sampling for junction saturation frequency in %. Smaller value means more sampling times. Must be an integer between 0 and 100, default = 5." + , + "default":5 + } + + + , + "read_count_upper_limit": { + "type": + "integer", + "description": "Type: `integer`, default: `500`. Upper limit of reads\u0027 occurence to determine read duplication", + "help_text": "Type: `integer`, default: `500`. Upper limit of reads\u0027 occurence to determine read duplication. Only used for plotting, default = 500 (times)." + , + "default":500 + } + + + , + "minimum_coverage": { + "type": + "integer", + "description": "Type: `integer`, default: `10`. Minimum number of reads mapped to a transcript to determin tin, default = 10", + "help_text": "Type: `integer`, default: `10`. Minimum number of reads mapped to a transcript to determin tin, default = 10." + , + "default":10 + } + + + , + "tin_sample_size": { + "type": + "integer", + "description": "Type: `integer`, default: `100`. Number of equal-spaced nucleotide positions picked from mRNA", + "help_text": "Type: `integer`, default: `100`. Number of equal-spaced nucleotide positions picked from mRNA. Note, if this number is larger than the length of mRNA (L), it will be halved until it\u0027s smaller than L (default = 100)" + , + "default":100 + } + + + , + "subtract_background": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Set flag to subtract background noise (estimated from intronic reads) to determine tin", + "help_text": "Type: `boolean_true`, default: `false`. Set flag to subtract background noise (estimated from intronic reads) to determine tin. Only use this option if there are substantial intronic reads." + , + "default":false + } + + + , + "pr_bases": { + "type": + "integer", + "description": "Type: `integer`, default: `100`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias for qualimap (default = 100)", + "help_text": "Type: `integer`, default: `100`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias for qualimap (default = 100)." + , + "default":100 + } + + + , + "tr_bias": { + "type": + "integer", + "description": "Type: `integer`, default: `1000`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias for qualimap (default = 1000)", + "help_text": "Type: `integer`, default: `1000`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias for qualimap (default = 1000)." + , + "default":1000 + } + + + , + "algorithm": { + "type": + "string", + "description": "Type: `string`, default: `uniquely-mapped-reads`. Counting algorithm for qualimap (uniquely-mapped-reads (default) or proportional)", + "help_text": "Type: `string`, default: `uniquely-mapped-reads`. Counting algorithm for qualimap (uniquely-mapped-reads (default) or proportional)." + , + "default":"uniquely-mapped-reads" + } + + + , + "sequencing_protocol": { + "type": + "string", + "description": "Type: `string`, default: `non-strand-specific`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol for qualimap (strand-specific-forward, strand-specific-reverse or non-strand-specific (default))", + "help_text": "Type: `string`, default: `non-strand-specific`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol for qualimap (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).", + "enum": ["non-strand-specific", "strand-specific-reverse", "strand-specific-forward"] + + , + "default":"non-strand-specific" + } + + + , + "sorted": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Setting this flag indicates that the input file is already sorted by name", + "help_text": "Type: `boolean_true`, default: `false`. Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed for qualimap. Only requiredfor paired-end analysis." + , + "default":false + } + + + , + "java_memory_size": { + "type": + "string", + "description": "Type: `string`, default: `4G`. maximum Java heap memory size for qualimap, default = 4G", + "help_text": "Type: `string`, default: `4G`. maximum Java heap memory size for qualimap, default = 4G." + , + "default":"4G" + } + + + , + "skip_deseq2_qc": { + "type": + "boolean", + "description": "Type: `boolean`. ", + "help_text": "Type: `boolean`. " + + } + + + , + "deseq2_vst": { + "type": + "boolean", + "description": "Type: `boolean`. Use vst transformation instead of rlog with DESeq2", + "help_text": "Type: `boolean`. Use vst transformation instead of rlog with DESeq2" + + } + + + , + "multiqc_custom_config": { + "type": + "string", + "description": "Type: `file`. Custom multiqc configuration file\n", + "help_text": "Type: `file`. Custom multiqc configuration file\n" + + } + + + , + "multiqc_title": { + "type": + "string", + "description": "Type: `string`. Custom multiqc title\n", + "help_text": "Type: `string`. Custom multiqc title\n" + + } + + + , + "multiqc_methods_description": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "passed_trimmed_reads": { + "type": + "boolean", + "description": "Type: `boolean`. ", + "help_text": "Type: `boolean`. " + + } + + + , + "num_trimmed_reads": { + "type": + "number", + "description": "Type: `double`. ", + "help_text": "Type: `double`. " + + } + + + , + "passed_mapping": { + "type": + "boolean", + "description": "Type: `boolean`. ", + "help_text": "Type: `boolean`. " + + } + + + , + "percent_mapped": { + "type": + "number", + "description": "Type: `double`. ", + "help_text": "Type: `double`. " + + } + + + , + "fastqc_zip_1": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "fastqc_zip_2": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "trim_zip_1": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "trim_zip_2": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "trim_log_1": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "trim_log_2": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "sortmerna_multiqc": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "star_multiqc": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "rsem_multiqc": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "genome_bam_stats": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "genome_bam_flagstat": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "genome_bam_idxstats": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "markduplicates_multiqc": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + + , + "pseudo_multiqc": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "preseq_output": { + "type": + "string", + "description": "Type: `file`, default: `$id.lc_extrap.txt`. ", + "help_text": "Type: `file`, default: `$id.lc_extrap.txt`. " + , + "default":"$id.lc_extrap.txt" + } + + + , + "bamstat_output": { + "type": + "string", + "description": "Type: `file`, default: `$id.mapping_quality.txt`. Path to output file (txt) of mapping quality statistics", + "help_text": "Type: `file`, default: `$id.mapping_quality.txt`. Path to output file (txt) of mapping quality statistics" + , + "default":"$id.mapping_quality.txt" + } + + + , + "strandedness_output": { + "type": + "string", + "description": "Type: `file`, default: `$id.strandedness.txt`. Path to output report (txt) of inferred strandedness", + "help_text": "Type: `file`, default: `$id.strandedness.txt`. Path to output report (txt) of inferred strandedness" + , + "default":"$id.strandedness.txt" + } + + + , + "inner_dist_output_stats": { + "type": + "string", + "description": "Type: `file`, default: `$id.inner_distance.stats`. output file (txt) with summary statistics of inner distances of paired reads", + "help_text": "Type: `file`, default: `$id.inner_distance.stats`. output file (txt) with summary statistics of inner distances of paired reads" + , + "default":"$id.inner_distance.stats" + } + + + , + "inner_dist_output_dist": { + "type": + "string", + "description": "Type: `file`, default: `$id.inner_distance.txt`. output file (txt) with inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.inner_distance.txt`. output file (txt) with inner distances of all paired reads" + , + "default":"$id.inner_distance.txt" + } + + + , + "inner_dist_output_freq": { + "type": + "string", + "description": "Type: `file`, default: `$id.inner_distance_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.inner_distance_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads" + , + "default":"$id.inner_distance_freq.txt" + } + + + , + "inner_dist_output_plot": { + "type": + "string", + "description": "Type: `file`, default: `$id.inner_distance_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.inner_distance_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads" + , + "default":"$id.inner_distance_plot.pdf" + } + + + , + "inner_dist_output_plot_r": { + "type": + "string", + "description": "Type: `file`, default: `$id.inner_distance_plot.r`. output file (R) with script of histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.inner_distance_plot.r`. output file (R) with script of histogram plot of of inner distances of all paired reads" + , + "default":"$id.inner_distance_plot.r" + } + + + , + "junction_annotation_output_log": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_annotation.log`. output log of junction annotation script", + "help_text": "Type: `file`, default: `$id.junction_annotation.log`. output log of junction annotation script" + , + "default":"$id.junction_annotation.log" + } + + + , + "junction_annotation_output_plot_r": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_annotation_plot.r`. R script to generate splice_junction and splice_events plot", + "help_text": "Type: `file`, default: `$id.junction_annotation_plot.r`. R script to generate splice_junction and splice_events plot" + , + "default":"$id.junction_annotation_plot.r" + } + + + , + "junction_annotation_output_junction_bed": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_annotation.bed`. junction annotation file (bed format)", + "help_text": "Type: `file`, default: `$id.junction_annotation.bed`. junction annotation file (bed format)" + , + "default":"$id.junction_annotation.bed" + } + + + , + "junction_annotation_output_junction_interact": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_annotation.Interact.bed`. interact file (bed format) of junctions", + "help_text": "Type: `file`, default: `$id.junction_annotation.Interact.bed`. interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization." + , + "default":"$id.junction_annotation.Interact.bed" + } + + + , + "junction_annotation_output_junction_sheet": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_annotation.xls`. junction annotation file (xls format)", + "help_text": "Type: `file`, default: `$id.junction_annotation.xls`. junction annotation file (xls format)" + , + "default":"$id.junction_annotation.xls" + } + + + , + "junction_annotation_output_splice_events_plot": { + "type": + "string", + "description": "Type: `file`, default: `$id.splice_events.pdf`. plot of splice events (pdf)", + "help_text": "Type: `file`, default: `$id.splice_events.pdf`. plot of splice events (pdf)" + , + "default":"$id.splice_events.pdf" + } + + + , + "junction_annotation_output_splice_junctions_plot": { + "type": + "string", + "description": "Type: `file`, default: `$id.splice_junctions_plot.pdf`. plot of junctions (pdf)", + "help_text": "Type: `file`, default: `$id.splice_junctions_plot.pdf`. plot of junctions (pdf)" + , + "default":"$id.splice_junctions_plot.pdf" + } + + + , + "junction_saturation_output_plot_r": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_saturation_plot.r`. r script to generate junction_saturation_plot plot", + "help_text": "Type: `file`, default: `$id.junction_saturation_plot.r`. r script to generate junction_saturation_plot plot" + , + "default":"$id.junction_saturation_plot.r" + } + + + , + "junction_saturation_output_plot": { + "type": + "string", + "description": "Type: `file`, default: `$id.junction_saturation_plot.pdf`. plot of junction saturation (pdf", + "help_text": "Type: `file`, default: `$id.junction_saturation_plot.pdf`. plot of junction saturation (pdf" + , + "default":"$id.junction_saturation_plot.pdf" + } + + + , + "read_distribution_output": { + "type": + "string", + "description": "Type: `file`, default: `$id.read_distribution.txt`. output file (txt) of read distribution analysis", + "help_text": "Type: `file`, default: `$id.read_distribution.txt`. output file (txt) of read distribution analysis." + , + "default":"$id.read_distribution.txt" + } + + + , + "read_duplication_output_duplication_rate_plot_r": { + "type": + "string", + "description": "Type: `file`, default: `$id.duplication_rate_plot.r`. R script for generating duplication rate plot", + "help_text": "Type: `file`, default: `$id.duplication_rate_plot.r`. R script for generating duplication rate plot" + , + "default":"$id.duplication_rate_plot.r" + } + + + , + "read_duplication_output_duplication_rate_plot": { + "type": + "string", + "description": "Type: `file`, default: `$id.duplication_rate_plot.pdf`. duplication rate plot (pdf)", + "help_text": "Type: `file`, default: `$id.duplication_rate_plot.pdf`. duplication rate plot (pdf)" + , + "default":"$id.duplication_rate_plot.pdf" + } + + + , + "read_duplication_output_duplication_rate_mapping": { + "type": + "string", + "description": "Type: `file`, default: `$id.duplication_rate_mapping.xls`. Summary of mapping-based read duplication", + "help_text": "Type: `file`, default: `$id.duplication_rate_mapping.xls`. Summary of mapping-based read duplication" + , + "default":"$id.duplication_rate_mapping.xls" + } + + + , + "read_duplication_output_duplication_rate_sequence": { + "type": + "string", + "description": "Type: `file`, default: `$id.duplication_rate_sequencing.xls`. Summary of sequencing-based read duplication", + "help_text": "Type: `file`, default: `$id.duplication_rate_sequencing.xls`. Summary of sequencing-based read duplication" + , + "default":"$id.duplication_rate_sequencing.xls" + } + + + , + "tin_output_summary": { + "type": + "string", + "description": "Type: `file`, default: `$id.tin_summary.txt`. summary statistics (txt) of calculated TIN metrics", + "help_text": "Type: `file`, default: `$id.tin_summary.txt`. summary statistics (txt) of calculated TIN metrics" + , + "default":"$id.tin_summary.txt" + } + + + , + "tin_output_metrics": { + "type": + "string", + "description": "Type: `file`, default: `$id.tin.xls`. file with TIN metrics (xls)", + "help_text": "Type: `file`, default: `$id.tin.xls`. file with TIN metrics (xls)" + , + "default":"$id.tin.xls" + } + + + , + "dupradar_output_dupmatrix": { + "type": + "string", + "description": "Type: `file`, default: `$id.dup_matrix.txt`. path to output file (txt) of duplicate tag counts", + "help_text": "Type: `file`, default: `$id.dup_matrix.txt`. path to output file (txt) of duplicate tag counts" + , + "default":"$id.dup_matrix.txt" + } + + + , + "dupradar_output_dup_intercept_mqc": { + "type": + "string", + "description": "Type: `file`, default: `$id.dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar", + "help_text": "Type: `file`, default: `$id.dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar" + , + "default":"$id.dup_intercept_mqc.txt" + } + + + , + "dupradar_output_duprate_exp_boxplot": { + "type": + "string", + "description": "Type: `file`, default: `$id.duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot", + "help_text": "Type: `file`, default: `$id.duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot" + , + "default":"$id.duprate_exp_boxplot.pdf" + } + + + , + "dupradar_output_duprate_exp_densplot": { + "type": + "string", + "description": "Type: `file`, default: `$id.duprate_exp_densityplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts", + "help_text": "Type: `file`, default: `$id.duprate_exp_densityplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts" + , + "default":"$id.duprate_exp_densityplot.pdf" + } + + + , + "dupradar_output_duprate_exp_denscurve_mqc": { + "type": + "string", + "description": "Type: `file`, default: `$id.duprate_exp_density_curve_mqc.pdf`. path to output file (pdf) of density curve of gene duplication multiqc", + "help_text": "Type: `file`, default: `$id.duprate_exp_density_curve_mqc.pdf`. path to output file (pdf) of density curve of gene duplication multiqc" + , + "default":"$id.duprate_exp_density_curve_mqc.pdf" + } + + + , + "dupradar_output_expression_histogram": { + "type": + "string", + "description": "Type: `file`, default: `$id.expression_hist.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram", + "help_text": "Type: `file`, default: `$id.expression_hist.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram" + , + "default":"$id.expression_hist.pdf" + } + + + , + "dupradar_output_intercept_slope": { + "type": + "string", + "description": "Type: `file`, default: `$id.intercept_slope.txt`. ", + "help_text": "Type: `file`, default: `$id.intercept_slope.txt`. " + , + "default":"$id.intercept_slope.txt" + } + + + , + "qualimap_qc_report": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.qualimap_qc_report.txt`, example: `$id.rnaseq_qc_results.txt`. Text file containing the RNAseq QC results", + "help_text": "Type: `file`, default: `$id.$key.qualimap_qc_report.txt`, example: `$id.rnaseq_qc_results.txt`. Text file containing the RNAseq QC results." + , + "default":"$id.$key.qualimap_qc_report.txt" + } + + + , + "qualimap_counts": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.qualimap_counts`. Output file for computed counts", + "help_text": "Type: `file`, default: `$id.$key.qualimap_counts`. Output file for computed counts." + , + "default":"$id.$key.qualimap_counts" + } + + + , + "qualimap_report": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.qualimap_report.html`, example: `$id.report.html`. Report output file", + "help_text": "Type: `file`, default: `$id.$key.qualimap_report.html`, example: `$id.report.html`. Report output file. Supported formats are PDF or HTML." + , + "default":"$id.$key.qualimap_report.html" + } + + + , + "deseq2_output": { + "type": + "string", + "description": "Type: `file`, default: `deseq2`. ", + "help_text": "Type: `file`, default: `deseq2`. " + , + "default":"deseq2" + } + + + , + "deseq2_output_pseudo": { + "type": + "string", + "description": "Type: `file`, default: `deseq2_pseudo`. ", + "help_text": "Type: `file`, default: `deseq2_pseudo`. " + , + "default":"deseq2_pseudo" + } + + + , + "multiqc_report": { + "type": + "string", + "description": "Type: `file`, default: `multiqc_report.html`. ", + "help_text": "Type: `file`, default: `multiqc_report.html`. " + , + "default":"multiqc_report.html" + } + + + , + "multiqc_data": { + "type": + "string", + "description": "Type: `file`, default: `multiqc_data`. ", + "help_text": "Type: `file`, default: `multiqc_data`. " + , + "default":"multiqc_data" + } + + + , + "multiqc_plots": { + "type": + "string", + "description": "Type: `file`, default: `multiqc_plots`. ", + "help_text": "Type: `file`, default: `multiqc_plots`. " + , + "default":"multiqc_plots" + } + + + , + "featurecounts": { + "type": + "string", + "description": "Type: `file`, default: `$id.featureCounts.txt`. ", + "help_text": "Type: `file`, default: `$id.featureCounts.txt`. " + , + "default":"$id.featureCounts.txt" + } + + + , + "featurecounts_summary": { + "type": + "string", + "description": "Type: `file`, default: `$id.featureCounts.txt.summary`. ", + "help_text": "Type: `file`, default: `$id.featureCounts.txt.summary`. " + , + "default":"$id.featureCounts.txt.summary" + } + + + , + "featurecounts_multiqc": { + "type": + "string", + "description": "Type: `file`, default: `$id.featureCounts_mqc.tsv`. ", + "help_text": "Type: `file`, default: `$id.featureCounts_mqc.tsv`. " + , + "default":"$id.featureCounts_mqc.tsv" + } + + + , + "featurecounts_rrna_multiqc": { + "type": + "string", + "description": "Type: `file`, default: `$id.featureCounts_rrna_mqc.tsv`. ", + "help_text": "Type: `file`, default: `$id.featureCounts_rrna_mqc.tsv`. " + , + "default":"$id.featureCounts_rrna_mqc.tsv" + } + + + , + "tpm_gene": { + "type": + "string", + "description": "Type: `file`, default: `salmon.merged.gene_tpm.tsv`. ", + "help_text": "Type: `file`, default: `salmon.merged.gene_tpm.tsv`. " + , + "default":"salmon.merged.gene_tpm.tsv" + } + + + , + "counts_gene": { + "type": + "string", + "description": "Type: `file`, default: `salmon.merged.gene_counts.tsv`. ", + "help_text": "Type: `file`, default: `salmon.merged.gene_counts.tsv`. " + , + "default":"salmon.merged.gene_counts.tsv" + } + + + , + "counts_gene_length_scaled": { + "type": + "string", + "description": "Type: `file`, default: `salmon.merged.gene_counts_length_scaled.tsv`. ", + "help_text": "Type: `file`, default: `salmon.merged.gene_counts_length_scaled.tsv`. " + , + "default":"salmon.merged.gene_counts_length_scaled.tsv" + } + + + , + "counts_gene_scaled": { + "type": + "string", + "description": "Type: `file`, default: `salmon.merged.gene_counts_scaled.tsv`. ", + "help_text": "Type: `file`, default: `salmon.merged.gene_counts_scaled.tsv`. " + , + "default":"salmon.merged.gene_counts_scaled.tsv" + } + + + , + "tpm_transcript": { + "type": + "string", + "description": "Type: `file`, default: `salmon.merged.transcript_tpm.tsv`. ", + "help_text": "Type: `file`, default: `salmon.merged.transcript_tpm.tsv`. " + , + "default":"salmon.merged.transcript_tpm.tsv" + } + + + , + "counts_transcript": { + "type": + "string", + "description": "Type: `file`, default: `salmon.merged.transcript_counts.tsv`. ", + "help_text": "Type: `file`, default: `salmon.merged.transcript_counts.tsv`. " + , + "default":"salmon.merged.transcript_counts.tsv" + } + + + , + "quant_merged_summarizedexperiment": { + "type": + "string", + "description": "Type: `file`, default: `salmon_merged_summarizedexperiment`. ", + "help_text": "Type: `file`, default: `salmon_merged_summarizedexperiment`. " + , + "default":"salmon_merged_summarizedexperiment" + } + + + , + "pseudo_tpm_gene": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_gene_tpm.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_gene_tpm.tsv`. " + , + "default":"pseudo_gene_tpm.tsv" + } + + + , + "pseudo_counts_gene": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_gene_counts.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_gene_counts.tsv`. " + , + "default":"pseudo_gene_counts.tsv" + } + + + , + "pseudo_counts_gene_length_scaled": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_gene_counts_length_scaled.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_gene_counts_length_scaled.tsv`. " + , + "default":"pseudo_gene_counts_length_scaled.tsv" + } + + + , + "pseudo_counts_gene_scaled": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_gene_counts_scaled.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_gene_counts_scaled.tsv`. " + , + "default":"pseudo_gene_counts_scaled.tsv" + } + + + , + "pseudo_tpm_transcript": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_transcript_tpm.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_transcript_tpm.tsv`. " + , + "default":"pseudo_transcript_tpm.tsv" + } + + + , + "pseudo_counts_transcript": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_transcript_counts.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_transcript_counts.tsv`. " + , + "default":"pseudo_transcript_counts.tsv" + } + + + , + "pseudo_quant_merged_summarizedexperiment": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_quant_merged_summarizedexperiment`. ", + "help_text": "Type: `file`, default: `pseudo_quant_merged_summarizedexperiment`. " + , + "default":"pseudo_quant_merged_summarizedexperiment" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/workflows/rnaseq/.config.vsh.yaml b/target/nextflow/workflows/rnaseq/.config.vsh.yaml new file mode 100644 index 0000000..9b818af --- /dev/null +++ b/target/nextflow/workflows/rnaseq/.config.vsh.yaml @@ -0,0 +1,2102 @@ +name: "rnaseq" +namespace: "workflows" +version: "v0.3.0" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "ID of the sample." + info: null + example: + - "foo" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_1" + description: "Path to the sample (or read 1 of paired end sample)." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Path to read 2 of the sample." + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, reverse\ + \ or auto" + info: null + default: + - "auto" + required: false + choices: + - "unstranded" + - "forward" + - "reverse" + - "auto" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Reference genome options" + arguments: + - type: "file" + name: "--fasta" + description: "Path to FASTA genome file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gtf" + description: "Path to GTF annotation file. This parameter is *mandatory* if --genome\ + \ is not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gff" + description: "Path to GFF3 annotation file. Required if \"--gtf\" is not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--additional_fasta" + description: "FASTA file to concatenate to genome FASTA file e.g. containing spike-in\ + \ sequences." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcript_fasta" + description: "Path to FASTA transcriptome file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--gene_bed" + description: "Path to BED file containing gene intervals. This will be created\ + \ from the GTF file if not specified." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--splicesites" + description: "Splice sites file required for HISAT2." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_index" + description: "Path to directory or tar.gz archive for pre-built STAR index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_index" + description: "Path to directory or tar.gz archive for pre-built RSEM index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_index" + description: "Path to directory or tar.gz archive for pre-built Salmon index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--kallisto_index" + description: "Path to directory or tar.gz archive for pre-built Kallisto index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--gencode" + description: "Specify if the GTF annotation is in GENCODE format." + info: null + direction: "input" + - type: "string" + name: "--gtf_extra_attributes" + description: "Additional gene identifiers from the input GTF file when running\ + \ Salmon. More than one value can be specified separated by comma." + info: null + default: + - "gene_name" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--gtf_group_features" + description: "Define the attribute type used to group features in the GTF file\ + \ when running Salmon." + info: null + default: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--featurecounts_group_type" + description: "The attribute type used to group feature types in the GTF file when\ + \ generating the biotype plot with featureCounts." + info: null + default: + - "gene_biotype" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--featurecounts_feature_type" + description: "By default, the pipeline assigns reads based on the 'exon' attribute\ + \ within the GTF file." + info: null + default: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--star_sjdb_gtf_feature_exon" + description: "Feature type in GTF file to be used as exons for building transcripts" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Read trimming options" + arguments: + - type: "string" + name: "--trimmer" + description: "Specify the trimming tool to use." + info: null + default: + - "trimgalore" + required: false + choices: + - "trimgalore" + - "fastp" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_trimmed_reads" + description: "Minimum number of trimmed reads below which samples are removed\ + \ from further processing. Some downstream steps in the pipeline will fail if\ + \ this threshold is too low." + info: null + default: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Read filtering options" + arguments: + - type: "file" + name: "--bbsplit_fasta_list" + description: "List of reference genomes (separated by \";\") to filter reads against\ + \ with BBSplit." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--bbsplit_index" + description: "Path to directory or tar.gz archive for pre-built BBSplit index." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--remove_ribo_rna" + description: "Enable the removal of reads derived from ribosomal RNA using SortMeRNA." + info: null + direction: "input" + - type: "file" + name: "--ribo_database_manifest" + description: "Text file containing paths to fasta files (one per line) that will\ + \ be used to create the database for SortMeRNA." + info: null + default: + - "src/assets/rrna-db-defaults.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "UMI options" + arguments: + - type: "boolean_true" + name: "--with_umi" + description: "Enable UMI-based read deduplication." + info: null + direction: "input" + - type: "string" + name: "--umitools_extract_method" + description: "UMI pattern to use." + info: null + default: + - "string" + required: false + choices: + - "string" + - "regex" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_bc_pattern" + description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the\ + \ first 6 nucleotides of the read are from the UMI." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_bc_pattern2" + description: "The UMI barcode pattern to use if the UMI is located in read 2." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--umi_discard_read" + description: "After UMI barcode extraction discard either R1 or R2 by setting\ + \ this parameter to 1 or 2, respectively." + info: null + default: + - 0 + required: false + choices: + - 0 + - 1 + - 2 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_umi_separator" + description: "The character that separates the UMI in the read name. Most likely\ + \ a colon if you skipped the extraction with UMI-tools and used other software." + info: null + default: + - "_" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umitools_grouping_method" + description: "Method to use to determine read groups by subsuming those with similar\ + \ UMIs. All methods start by identifying the reads with the same mapping position,\ + \ but treat similar yet nonidentical UMIs differently." + info: null + default: + - "directional" + required: false + choices: + - "unique" + - "percentile" + - "cluster" + - "adjacency" + - "directional" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--umi_dedup_stats" + description: "Generate output stats when running \"umi_tools dedup\"." + info: null + direction: "input" +- name: "Alignment options" + arguments: + - type: "string" + name: "--aligner" + description: "Specifies the alignment algorithm to use - available options are\ + \ 'star_salmon', 'star_rsem' and 'hisat2'." + info: null + default: + - "star_salmon" + required: false + choices: + - "star_salmon" + - "star_rsem" + - "hisat2" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--pseudo_aligner" + description: "Specifies the pseudo aligner to use - available options are 'salmon'.\ + \ Runs in addition to '--aligner'." + info: null + default: + - "salmon" + required: false + choices: + - "salmon" + - "kallisto" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--pseudo_aligner_kmer_size" + description: "Kmer length passed to indexing step of pseudoaligners." + info: null + default: + - 31 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--kallisto_quant_fragment_length" + description: "For single-end mode only, the estimated average fragment length\ + \ to use for quantification with Kallisto." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--kallisto_quant_fragment_length_sd" + description: "For single-end mode only, the estimated standard deviation of the\ + \ fragment length for quantification with Kallisto." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--bam_csi_index" + description: "Create a CSI index for BAM files instead of the traditional BAI\ + \ index. This will be required for genomes with larger chromosome sizes." + info: null + direction: "input" + - type: "string" + name: "--salmon_quant_libtype" + description: "Override Salmon library type inferred based on strandedness defined\ + \ in meta object." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_mapped_reads" + description: "Minimum percentage of uniquely mapped reads below which samples\ + \ are removed from further processing." + info: null + default: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--stringtie_ignore_gtf" + description: "Perform reference-guided de novo assembly of transcripts using StringTie,\ + \ i.e. don't restrict to those in GTF file." + info: null + direction: "input" + - type: "string" + name: "--extra_stringtie_args" + description: "Extra arguments to pass to stringtie command in addition to defaults\ + \ defined by the pipeline." + info: null + default: + - "-v" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--save_unaligned" + description: "Where possible, save unaligned reads from either STAR, HISAT2 or\ + \ Salmon to the results directory." + info: null + direction: "input" + - type: "boolean_true" + name: "--save_align_intermeds" + description: "Save the intermediate BAM files from the alignment step." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_alignment" + description: "Skip all of the alignment-based processes within the pipeline." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_pseudo_alignment" + description: "Skip all of the pseudo-alignment-based processes within the pipeline." + info: null + direction: "input" +- name: "Process skipping options" + arguments: + - type: "boolean" + name: "--skip_fastqc" + description: "Skip FatQC step." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_trimming" + description: "Skip the adapter trimming step." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--skip_umi_extract" + description: "Skip umi_tools extract step." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--skip_qc" + description: "Skip all QC steps except for MultiQC." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_markduplicates" + description: "Skip picard MarkDuplicates step." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_stringtie" + description: "Skip StringTie." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_biotype_qc" + description: "Skip additional featureCounts process for biotype QC." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_bigwig" + description: "Skip bigWig file creation." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_preseq" + description: "Skip Preseq." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_dupradar" + description: "Skip dupRadar." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_qualimap" + description: "Skip Qualimap." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_rseqc" + description: "Skip RSeQC." + info: null + direction: "input" + - type: "boolean_true" + name: "--skip_multiqc" + description: "Skip MultiQC." + info: null + direction: "input" +- name: "Other process arguments" + arguments: + - type: "string" + name: "--extra_picard_args" + description: "Extra arguments to pass to picard MarkDuplicates command in addition\ + \ to defaults defined by the pipeline." + info: null + default: + - " --ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT\ + \ --TMP_DIR tmp" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_preseq_args" + description: "Extra arguments to pass to preseq lc_extrap command in addition\ + \ to defaults defined by the pipeline" + info: null + default: + - "-verbose -seed 1 -seg_len 100000000" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--deseq2_vst" + description: "Use vst transformation instead of rlog with DESeq2" + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--rseqc_modules" + description: "Specify the RSeQC modules to run_wf (comma-separated list)" + info: null + default: + - "bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication" + required: false + choices: + - "bam_stat" + - "inner_distance" + - "infer_experiment" + - "junction_annotation" + - "junction_saturation" + - "read_distribution" + - "read_duplication" + - "tin" + direction: "input" + multiple: true + multiple_sep: ";" +- name: "MultiQC paramenters" + arguments: + - type: "file" + name: "--multiqc_custom_config" + description: "Custom multiqc configuration file\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--multiqc_title" + description: "Custom multiqc title\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_methods_description" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_fasta" + info: null + default: + - "reference/genome.fasta" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_gtf" + info: null + default: + - "reference/gene_annotation.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_transcript_fasta" + info: null + default: + - "reference/transcriptome.fasta" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_gene_bed" + info: null + default: + - "reference/gene_annotation.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_star_index" + description: "Path to STAR index." + info: null + default: + - "reference/index/STAR" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_salmon_index" + description: "Path to Salmon index." + info: null + default: + - "reference/index/Salmon" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_bbsplit_index" + description: "Path to BBSplit index." + info: null + default: + - "reference/index/BBSplit" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_kallisto_index" + description: "Path to Kallisto index." + info: null + default: + - "reference/index/Kallisto" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_fastq_1" + description: "Path to output directory" + info: null + default: + - "fastq/${id}_r1.fastq.gz" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_fastq_2" + description: "Path to output directory" + info: null + default: + - "fastq/${id}_r2.fastq.gz" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_html_1" + description: "FastQC HTML report for read 1." + info: null + default: + - "fastqc_raw/${id}_r1.fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_html_2" + description: "FastQC HTML report for read 2." + info: null + default: + - "fastqc_raw/${id}_r2.fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_zip_1" + description: "FastQC report archive for read 1." + info: null + default: + - "fastqc_raw/${id}_r1.fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_zip_2" + description: "FastQC report archive for read 2." + info: null + default: + - "fastqc_raw/${id}_r2.fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_html_1" + info: null + default: + - "fastqc_trim/${id}_r1.trimmed_fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_html_2" + info: null + default: + - "fastqc_trim/${id}_r2.trimmed_fastqc.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_zip_1" + info: null + default: + - "fastqc_trim/${id}_r1.trimmed_fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_zip_2" + info: null + default: + - "fastqc_trim/${id}_r2.trimmed_fastqc.zip" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_log_1" + info: null + default: + - "trimgalore/${id}_r1.trimming_report.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--trim_log_2" + info: null + default: + - "trimgalore/${id}_r2.trimming_report.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastp_trim_json" + description: "The fastp json format report file name" + info: null + default: + - "fastp/$id_out.json" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastp_trim_html" + description: "The fastp html format report file name" + info: null + default: + - "fastp/$id_out.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sortmerna_log" + description: "Sortmerna log file." + info: null + default: + - "sortmerna/$id.log" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_alignment" + info: null + default: + - "STAR/$id" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_sorted" + info: null + default: + - "STAR/genome_processed/$id.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_index" + info: null + default: + - "STAR/genome_processed/$id.genome.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam" + info: null + default: + - "STAR/transcriptome_processed/$id.transcriptome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_index" + info: null + default: + - "STAR/transcriptome_processed/$id.transcriptome.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--star_log" + info: null + default: + - "STAR/log/$id.log" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_stats" + info: null + default: + - "samtools_stats/$id.genome.stats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_flagstat" + info: null + default: + - "samtools_stats/$id.genome.flagstat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome_bam_idxstats" + info: null + default: + - "samtools_stats/$id.genome.idxstats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_stats" + info: null + default: + - "samtools_stats/$id.transcriptome.stats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_flagstat" + info: null + default: + - "samtools_stats/$id.transcriptome.flagstat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--transcriptome_bam_idxstats" + info: null + default: + - "samtools_stats/$id.transcriptome.idxstats" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_quant_results" + info: null + default: + - "STAR_Salmon/$id" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--salmon_quant_results_file" + info: null + default: + - "STAR_Salmon/$id/quant.sf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_quant_results" + info: null + default: + - "Pseudo_align_quant/$id" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_counts_gene" + description: "Expression counts on gene level" + info: null + default: + - "RSEM/$id.genes.results" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--rsem_counts_transcripts" + description: "Expression counts on transcript level" + info: null + default: + - "RSEM/$id.isoforms.results" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_star_rsem" + description: "BAM file generated by STAR (from RSEM)" + info: null + default: + - "RSEM/$id.STAR.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_genome_rsem" + description: "Genome BAM file (from RSEM)" + info: null + default: + - "RSEM/$id.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_transcript_rsem" + description: "Transcript BAM file (from RSEM)" + info: null + default: + - "RSEM/$id.transcript.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_gene" + info: null + default: + - "transcript_quantification/gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene" + info: null + default: + - "transcript_quantification/gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_length_scaled" + info: null + default: + - "transcript_quantification/gene_counts_length_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_gene_scaled" + info: null + default: + - "transcript_quantification/gene_counts_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tpm_transcript" + info: null + default: + - "transcript_quantification/transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcript" + info: null + default: + - "transcript_quantification/transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--quant_merged_summarizedexperiment" + info: null + default: + - "transcript_quantification/summarizedexperiment" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--markduplicates_metrics" + info: null + default: + - "picard/$id.MarkDuplicates.metrics.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_transcript_gtf" + info: null + default: + - "stringtie/$id.transcripts.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_coverage_gtf" + info: null + default: + - "stringtie/$id.coverage.gtf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_abundance" + info: null + default: + - "stringtie/$id.gene_abundance.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stringtie_ballgown" + info: null + default: + - "stringtie/$id.ballgown" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts" + info: null + default: + - "featurecounts/$id.featureCounts.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_summary" + info: null + default: + - "featurecounts/$id.featureCounts.txt.summary" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_multiqc" + info: null + default: + - "featurecounts/$id.featureCounts_mqc.tsv" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--featurecounts_rrna_multiqc" + info: null + default: + - "featurecounts/$id.featureCounts_rrna_mqc.tsv" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bedgraph_forward" + info: null + default: + - "bedgraph/$id.forward.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bedgraph_reverse" + info: null + default: + - "bedgraph/$id.reverse.bedgraph" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bigwig_forward" + info: null + default: + - "bigwig/$id.forward.bigwig" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bigwig_reverse" + info: null + default: + - "bigwig/$id.reverse.bigwig" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--preseq_output" + info: null + default: + - "preseq/$id.lc_extrap.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bamstat_output" + description: "Path to output file (txt) of mapping quality statistics" + info: null + default: + - "RSeQC/bamstat/$id.mapping_quality.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--strandedness_output" + description: "Path to output report (txt) of inferred strandedness" + info: null + default: + - "RSeQC/inferexperiment/$id.strandedness.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_stats" + description: "output file (txt) with summary statistics of inner distances of\ + \ paired reads" + info: null + default: + - "RSeQC/innerdistance/$id.inner_distance.stats" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_dist" + description: "output file (txt) with inner distances of all paired reads" + info: null + default: + - "RSeQC/innerdistance/txt/$id.inner_distance.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_freq" + description: "output file (txt) with frequencies of inner distances of all paired\ + \ reads" + info: null + default: + - "RSeQC/innerdistance/txt/$id.inner_distance_freq.txt" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_plot" + description: "output file (pdf) with histogram plot of of inner distances of all\ + \ paired reads" + info: null + default: + - "RSeQC/innerdistance/pdf/$id.inner_distance_plot.pdf" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--inner_dist_output_plot_r" + description: "output file (R) with script of histogram plot of of inner distances\ + \ of all paired reads" + info: null + default: + - "RSeQC/innerdistance/rscript/$id.inner_distance_plot.r" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_log" + description: "output log of junction annotation script" + info: null + default: + - "RSeQC/junctionannotation/log/$id.junction_annotation.log" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_plot_r" + description: "R script to generate splice_junction and splice_events plot" + info: null + default: + - "RSeQC/junctionannotation/rscript/$id.junction_annotation_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_junction_bed" + description: "junction annotation file (bed format)" + info: null + default: + - "RSeQC/junctionannotation/bed/$id.junction_annotation.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_junction_interact" + description: "interact file (bed format) of junctions. Can be uploaded to UCSC\ + \ genome browser or converted to bigInteract (using bedToBigBed program) for\ + \ visualization." + info: null + default: + - "RSeQC/junctionannotation/bed/$id.junction_annotation.Interact.bed" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_junction_sheet" + description: "junction annotation file (xls format)" + info: null + default: + - "RSeQC/junctionannotation/xls/$id.junction_annotation.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_splice_events_plot" + description: "plot of splice events (pdf)" + info: null + default: + - "RSeQC/junctionannotation/pdf/$id.splice_events.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_annotation_output_splice_junctions_plot" + description: "plot of junctions (pdf)" + info: null + default: + - "RSeQC/junctionannotation/pdf/$id.splice_junctions_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_saturation_output_plot_r" + description: "r script to generate junction_saturation_plot plot" + info: null + default: + - "RSeQC/junctionsaturation/rscript/$id.junction_saturation_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--junction_saturation_output_plot" + description: "plot of junction saturation (pdf" + info: null + default: + - "RSeQC/junctionsaturation/pdf/$id.junction_saturation_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_distribution_output" + description: "output file (txt) of read distribution analysis." + info: null + default: + - "RSeQC/readdistribution/$id.read_distribution.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_plot_r" + description: "R script for generating duplication rate plot" + info: null + default: + - "RSeQC/readduplication/rscrpt/$id.duplication_rate_plot.r" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_plot" + description: "duplication rate plot (pdf)" + info: null + default: + - "RSeQC/readduplication/pdf/$id.duplication_rate_plot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_mapping" + description: "Summary of mapping-based read duplication" + info: null + default: + - "RSeQC/readduplication/xls/$id.duplication_rate_mapping.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--read_duplication_output_duplication_rate_sequence" + description: "Summary of sequencing-based read duplication" + info: null + default: + - "RSeQC/readduplication/xls/$id.duplication_rate_sequencing.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tin_output_summary" + description: "summary statistics (txt) of calculated TIN metrics" + info: null + default: + - "RSeQC/tin/txt/$id.tin_summary.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--tin_output_metrics" + description: "file with TIN metrics (xls)" + info: null + default: + - "RSeQC/tin/xls/$id.tin.xls" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_dupmatrix" + description: "path to output file (txt) of duplicate tag counts" + info: null + default: + - "dupradar/gene_data/$id.dup_matrix.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_dup_intercept_mqc" + description: "path to output file (txt) of multiqc intercept value DupRadar" + info: null + default: + - "dupradar/mqc_intercept/$id.dup_intercept_mqc.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_duprate_exp_boxplot" + description: "path to output file (pdf) of distribution of expression box plot" + info: null + default: + - "dupradar/box_plot/$id.duprate_exp_boxplot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_duprate_exp_densplot" + description: "path to output file (pdf) of 2D density scatter plot of duplicate\ + \ tag counts" + info: null + default: + - "dupradar/scatter_plot/$id.duprate_exp_densityplot.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_duprate_exp_denscurve_mqc" + description: "path to output file (pdf) of density curve of gene duplication multiqc" + info: null + default: + - "dupradar/density_curve/$id.duprate_exp_density_curve_mqc.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_expression_histogram" + description: "path to output file (pdf) of distribution of RPK values per gene\ + \ histogram" + info: null + default: + - "dupradar/histogram/$id.expression_hist.pdf" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--dupradar_output_intercept_slope" + info: null + default: + - "dupradar/intercept_slope/$id.intercept_slope.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_qc_report" + description: "Text file containing the RNAseq QC results." + info: null + default: + - "Qualimap/$id.rnaseq_qc_results.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_counts" + description: "Output file for computed counts." + info: null + default: + - "Qualimap/$id.counts.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_report" + description: "Report output file. Supported formats are PDF or HTML." + info: null + default: + - "Qualimap/$id.report.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--deseq2_output" + info: null + default: + - "deseq2_qc" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--deseq2_output_pseudo" + info: null + default: + - "deseq2_qc_pseudo" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_report" + info: null + default: + - "multiqc/multiqc_report.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_data" + info: null + default: + - "multiqc/multiqc_data" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_plots" + info: null + default: + - "multiqc/multiqc_plots" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_versions" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_gene" + info: null + default: + - "pseudo_alignment_quantification/gene_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_gene_length_scaled" + info: null + default: + - "pseudo_alignment_quantification/gene_counts_length_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_gene_scaled" + info: null + default: + - "pseudo_alignment_quantification/gene_counts_scaled.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_tpm_gene" + info: null + default: + - "pseudo_alignment_quantification/gene_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_tpm_transcript" + info: null + default: + - "pseudo_alignment_quantification/transcript_tpm.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_counts_transcript" + info: null + default: + - "pseudo_alignment_quantification/transcript_counts.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--pseudo_quant_merged_summarizedexperiment" + info: null + default: + - "pseudo_alignment_quantification/quant_merged_summarizedexperiment" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A viash workflow for the nf-core/rnaseq pipeline.\n" +test_resources: +- type: "nextflow_script" + path: "test.nf" + is_executable: true + entrypoint: "test_wf" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "workflows/prepare_genome" + repository: + type: "local" +- name: "cat_fastq" + repository: + type: "local" +- name: "workflows/pre_processing" + repository: + type: "local" +- name: "workflows/genome_alignment_and_quant" + repository: + type: "local" +- name: "workflows/pseudo_alignment_and_quant" + repository: + type: "local" +- name: "workflows/post_processing" + repository: + type: "local" +- name: "workflows/quality_control" + repository: + type: "local" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/rnaseq/config.vsh.yaml" + runner: "nextflow" + engine: "native" + output: "target/nextflow/workflows/rnaseq" + executable: "target/nextflow/workflows/rnaseq/main.nf" + viash_version: "0.9.4" + git_commit: "e21130ff7a4a215d28ec1988730cbc95c0259076" + git_remote: "https://github.com/viash-hub/rnaseq" + dependencies: + - "target/nextflow/workflows/prepare_genome" + - "target/nextflow/cat_fastq" + - "target/nextflow/workflows/pre_processing" + - "target/nextflow/workflows/genome_alignment_and_quant" + - "target/nextflow/workflows/pseudo_alignment_and_quant" + - "target/nextflow/workflows/post_processing" + - "target/nextflow/workflows/quality_control" +package_config: + name: "rnaseq" + version: "v0.3.0" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "v0.3.1" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + organization: "vsh" diff --git a/target/nextflow/workflows/rnaseq/main.nf b/target/nextflow/workflows/rnaseq/main.nf new file mode 100644 index 0000000..eb2dcf7 --- /dev/null +++ b/target/nextflow/workflows/rnaseq/main.nf @@ -0,0 +1,6315 @@ +// rnaseq v0.3.0 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "rnaseq", + "namespace" : "workflows", + "version" : "v0.3.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "ID of the sample.", + "example" : [ + "foo" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_1", + "description" : "Path to the sample (or read 1 of paired end sample).", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_2", + "description" : "Path to read 2 of the sample.", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--strandedness", + "description" : "Sample strand-specificity. Must be one of unstranded, forward, reverse or auto", + "default" : [ + "auto" + ], + "required" : false, + "choices" : [ + "unstranded", + "forward", + "reverse", + "auto" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Reference genome options", + "arguments" : [ + { + "type" : "file", + "name" : "--fasta", + "description" : "Path to FASTA genome file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gtf", + "description" : "Path to GTF annotation file. This parameter is *mandatory* if --genome is not specified.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gff", + "description" : "Path to GFF3 annotation file. Required if \\"--gtf\\" is not specified.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--additional_fasta", + "description" : "FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcript_fasta", + "description" : "Path to FASTA transcriptome file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--gene_bed", + "description" : "Path to BED file containing gene intervals. This will be created from the GTF file if not specified.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--splicesites", + "description" : "Splice sites file required for HISAT2.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--star_index", + "description" : "Path to directory or tar.gz archive for pre-built STAR index.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--rsem_index", + "description" : "Path to directory or tar.gz archive for pre-built RSEM index.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--salmon_index", + "description" : "Path to directory or tar.gz archive for pre-built Salmon index.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--kallisto_index", + "description" : "Path to directory or tar.gz archive for pre-built Kallisto index.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--gencode", + "description" : "Specify if the GTF annotation is in GENCODE format.", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--gtf_extra_attributes", + "description" : "Additional gene identifiers from the input GTF file when running Salmon. More than one value can be specified separated by comma.", + "default" : [ + "gene_name" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--gtf_group_features", + "description" : "Define the attribute type used to group features in the GTF file when running Salmon.", + "default" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--featurecounts_group_type", + "description" : "The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts.", + "default" : [ + "gene_biotype" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--featurecounts_feature_type", + "description" : "By default, the pipeline assigns reads based on the 'exon' attribute within the GTF file.", + "default" : [ + "exon" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--star_sjdb_gtf_feature_exon", + "description" : "Feature type in GTF file to be used as exons for building transcripts", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Read trimming options", + "arguments" : [ + { + "type" : "string", + "name" : "--trimmer", + "description" : "Specify the trimming tool to use.", + "default" : [ + "trimgalore" + ], + "required" : false, + "choices" : [ + "trimgalore", + "fastp" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_trimmed_reads", + "description" : "Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low.", + "default" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Read filtering options", + "arguments" : [ + { + "type" : "file", + "name" : "--bbsplit_fasta_list", + "description" : "List of reference genomes (separated by \\";\\") to filter reads against with BBSplit.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bbsplit_index", + "description" : "Path to directory or tar.gz archive for pre-built BBSplit index.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--remove_ribo_rna", + "description" : "Enable the removal of reads derived from ribosomal RNA using SortMeRNA.", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--ribo_database_manifest", + "description" : "Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.", + "default" : [ + "src/assets/rrna-db-defaults.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "UMI options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--with_umi", + "description" : "Enable UMI-based read deduplication.", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--umitools_extract_method", + "description" : "UMI pattern to use.", + "default" : [ + "string" + ], + "required" : false, + "choices" : [ + "string", + "regex" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umitools_bc_pattern", + "description" : "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umitools_bc_pattern2", + "description" : "The UMI barcode pattern to use if the UMI is located in read 2.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--umi_discard_read", + "description" : "After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively.", + "default" : [ + 0 + ], + "required" : false, + "choices" : [ + 0, + 1, + 2 + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umitools_umi_separator", + "description" : "The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software.", + "default" : [ + "_" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umitools_grouping_method", + "description" : "Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.", + "default" : [ + "directional" + ], + "required" : false, + "choices" : [ + "unique", + "percentile", + "cluster", + "adjacency", + "directional" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--umi_dedup_stats", + "description" : "Generate output stats when running \\"umi_tools dedup\\".", + "direction" : "input" + } + ] + }, + { + "name" : "Alignment options", + "arguments" : [ + { + "type" : "string", + "name" : "--aligner", + "description" : "Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'.", + "default" : [ + "star_salmon" + ], + "required" : false, + "choices" : [ + "star_salmon", + "star_rsem", + "hisat2" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--pseudo_aligner", + "description" : "Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'.", + "default" : [ + "salmon" + ], + "required" : false, + "choices" : [ + "salmon", + "kallisto" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--pseudo_aligner_kmer_size", + "description" : "Kmer length passed to indexing step of pseudoaligners.", + "default" : [ + 31 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--kallisto_quant_fragment_length", + "description" : "For single-end mode only, the estimated average fragment length to use for quantification with Kallisto.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--kallisto_quant_fragment_length_sd", + "description" : "For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--bam_csi_index", + "description" : "Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes.", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--salmon_quant_libtype", + "description" : "Override Salmon library type inferred based on strandedness defined in meta object.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_mapped_reads", + "description" : "Minimum percentage of uniquely mapped reads below which samples are removed from further processing.", + "default" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--stringtie_ignore_gtf", + "description" : "Perform reference-guided de novo assembly of transcripts using StringTie, i.e. don't restrict to those in GTF file.", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--extra_stringtie_args", + "description" : "Extra arguments to pass to stringtie command in addition to defaults defined by the pipeline.", + "default" : [ + "-v" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--save_unaligned", + "description" : "Where possible, save unaligned reads from either STAR, HISAT2 or Salmon to the results directory.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--save_align_intermeds", + "description" : "Save the intermediate BAM files from the alignment step.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_alignment", + "description" : "Skip all of the alignment-based processes within the pipeline.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_pseudo_alignment", + "description" : "Skip all of the pseudo-alignment-based processes within the pipeline.", + "direction" : "input" + } + ] + }, + { + "name" : "Process skipping options", + "arguments" : [ + { + "type" : "boolean", + "name" : "--skip_fastqc", + "description" : "Skip FatQC step.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--skip_trimming", + "description" : "Skip the adapter trimming step.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--skip_umi_extract", + "description" : "Skip umi_tools extract step.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--skip_qc", + "description" : "Skip all QC steps except for MultiQC.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_markduplicates", + "description" : "Skip picard MarkDuplicates step.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_stringtie", + "description" : "Skip StringTie.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_biotype_qc", + "description" : "Skip additional featureCounts process for biotype QC.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_bigwig", + "description" : "Skip bigWig file creation.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_preseq", + "description" : "Skip Preseq.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_dupradar", + "description" : "Skip dupRadar.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_qualimap", + "description" : "Skip Qualimap.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_rseqc", + "description" : "Skip RSeQC.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--skip_multiqc", + "description" : "Skip MultiQC.", + "direction" : "input" + } + ] + }, + { + "name" : "Other process arguments", + "arguments" : [ + { + "type" : "string", + "name" : "--extra_picard_args", + "description" : "Extra arguments to pass to picard MarkDuplicates command in addition to defaults defined by the pipeline.", + "default" : [ + " --ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extra_preseq_args", + "description" : "Extra arguments to pass to preseq lc_extrap command in addition to defaults defined by the pipeline", + "default" : [ + "-verbose -seed 1 -seg_len 100000000" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--deseq2_vst", + "description" : "Use vst transformation instead of rlog with DESeq2", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--rseqc_modules", + "description" : "Specify the RSeQC modules to run_wf (comma-separated list)", + "default" : [ + "bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication" + ], + "required" : false, + "choices" : [ + "bam_stat", + "inner_distance", + "infer_experiment", + "junction_annotation", + "junction_saturation", + "read_distribution", + "read_duplication", + "tin" + ], + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "MultiQC paramenters", + "arguments" : [ + { + "type" : "file", + "name" : "--multiqc_custom_config", + "description" : "Custom multiqc configuration file\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--multiqc_title", + "description" : "Custom multiqc title\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--multiqc_methods_description", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output_fasta", + "default" : [ + "reference/genome.fasta" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_gtf", + "default" : [ + "reference/gene_annotation.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_transcript_fasta", + "default" : [ + "reference/transcriptome.fasta" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_gene_bed", + "default" : [ + "reference/gene_annotation.bed" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_star_index", + "description" : "Path to STAR index.", + "default" : [ + "reference/index/STAR" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_salmon_index", + "description" : "Path to Salmon index.", + "default" : [ + "reference/index/Salmon" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_bbsplit_index", + "description" : "Path to BBSplit index.", + "default" : [ + "reference/index/BBSplit" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_kallisto_index", + "description" : "Path to Kallisto index.", + "default" : [ + "reference/index/Kallisto" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_fastq_1", + "description" : "Path to output directory", + "default" : [ + "fastq/${id}_r1.fastq.gz" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_fastq_2", + "description" : "Path to output directory", + "default" : [ + "fastq/${id}_r2.fastq.gz" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_html_1", + "description" : "FastQC HTML report for read 1.", + "default" : [ + "fastqc_raw/${id}_r1.fastqc.html" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_html_2", + "description" : "FastQC HTML report for read 2.", + "default" : [ + "fastqc_raw/${id}_r2.fastqc.html" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_zip_1", + "description" : "FastQC report archive for read 1.", + "default" : [ + "fastqc_raw/${id}_r1.fastqc.zip" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_zip_2", + "description" : "FastQC report archive for read 2.", + "default" : [ + "fastqc_raw/${id}_r2.fastqc.zip" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_html_1", + "default" : [ + "fastqc_trim/${id}_r1.trimmed_fastqc.html" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_html_2", + "default" : [ + "fastqc_trim/${id}_r2.trimmed_fastqc.html" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_zip_1", + "default" : [ + "fastqc_trim/${id}_r1.trimmed_fastqc.zip" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_zip_2", + "default" : [ + "fastqc_trim/${id}_r2.trimmed_fastqc.zip" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_log_1", + "default" : [ + "trimgalore/${id}_r1.trimming_report.txt" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--trim_log_2", + "default" : [ + "trimgalore/${id}_r2.trimming_report.txt" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastp_trim_json", + "description" : "The fastp json format report file name", + "default" : [ + "fastp/$id_out.json" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastp_trim_html", + "description" : "The fastp html format report file name", + "default" : [ + "fastp/$id_out.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sortmerna_log", + "description" : "Sortmerna log file.", + "default" : [ + "sortmerna/$id.log" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--star_alignment", + "default" : [ + "STAR/$id" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_sorted", + "default" : [ + "STAR/genome_processed/$id.genome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_index", + "default" : [ + "STAR/genome_processed/$id.genome.bam.bai" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcriptome_bam", + "default" : [ + "STAR/transcriptome_processed/$id.transcriptome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcriptome_bam_index", + "default" : [ + "STAR/transcriptome_processed/$id.transcriptome.bam.bai" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--star_log", + "default" : [ + "STAR/log/$id.log" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_stats", + "default" : [ + "samtools_stats/$id.genome.stats" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_flagstat", + "default" : [ + "samtools_stats/$id.genome.flagstat" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome_bam_idxstats", + "default" : [ + "samtools_stats/$id.genome.idxstats" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcriptome_bam_stats", + "default" : [ + "samtools_stats/$id.transcriptome.stats" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcriptome_bam_flagstat", + "default" : [ + "samtools_stats/$id.transcriptome.flagstat" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--transcriptome_bam_idxstats", + "default" : [ + "samtools_stats/$id.transcriptome.idxstats" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--salmon_quant_results", + "default" : [ + "STAR_Salmon/$id" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--salmon_quant_results_file", + "default" : [ + "STAR_Salmon/$id/quant.sf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_quant_results", + "default" : [ + "Pseudo_align_quant/$id" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--rsem_counts_gene", + "description" : "Expression counts on gene level", + "default" : [ + "RSEM/$id.genes.results" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--rsem_counts_transcripts", + "description" : "Expression counts on transcript level", + "default" : [ + "RSEM/$id.isoforms.results" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam_star_rsem", + "description" : "BAM file generated by STAR (from RSEM)", + "default" : [ + "RSEM/$id.STAR.genome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam_genome_rsem", + "description" : "Genome BAM file (from RSEM)", + "default" : [ + "RSEM/$id.genome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam_transcript_rsem", + "description" : "Transcript BAM file (from RSEM)", + "default" : [ + "RSEM/$id.transcript.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tpm_gene", + "default" : [ + "transcript_quantification/gene_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene", + "default" : [ + "transcript_quantification/gene_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene_length_scaled", + "default" : [ + "transcript_quantification/gene_counts_length_scaled.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_gene_scaled", + "default" : [ + "transcript_quantification/gene_counts_scaled.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tpm_transcript", + "default" : [ + "transcript_quantification/transcript_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_transcript", + "default" : [ + "transcript_quantification/transcript_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--quant_merged_summarizedexperiment", + "default" : [ + "transcript_quantification/summarizedexperiment" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--markduplicates_metrics", + "default" : [ + "picard/$id.MarkDuplicates.metrics.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--stringtie_transcript_gtf", + "default" : [ + "stringtie/$id.transcripts.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--stringtie_coverage_gtf", + "default" : [ + "stringtie/$id.coverage.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--stringtie_abundance", + "default" : [ + "stringtie/$id.gene_abundance.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--stringtie_ballgown", + "default" : [ + "stringtie/$id.ballgown" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--featurecounts", + "default" : [ + "featurecounts/$id.featureCounts.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--featurecounts_summary", + "default" : [ + "featurecounts/$id.featureCounts.txt.summary" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--featurecounts_multiqc", + "default" : [ + "featurecounts/$id.featureCounts_mqc.tsv" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--featurecounts_rrna_multiqc", + "default" : [ + "featurecounts/$id.featureCounts_rrna_mqc.tsv" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bedgraph_forward", + "default" : [ + "bedgraph/$id.forward.bedgraph" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bedgraph_reverse", + "default" : [ + "bedgraph/$id.reverse.bedgraph" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bigwig_forward", + "default" : [ + "bigwig/$id.forward.bigwig" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bigwig_reverse", + "default" : [ + "bigwig/$id.reverse.bigwig" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--preseq_output", + "default" : [ + "preseq/$id.lc_extrap.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bamstat_output", + "description" : "Path to output file (txt) of mapping quality statistics", + "default" : [ + "RSeQC/bamstat/$id.mapping_quality.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--strandedness_output", + "description" : "Path to output report (txt) of inferred strandedness", + "default" : [ + "RSeQC/inferexperiment/$id.strandedness.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--inner_dist_output_stats", + "description" : "output file (txt) with summary statistics of inner distances of paired reads", + "default" : [ + "RSeQC/innerdistance/$id.inner_distance.stats" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--inner_dist_output_dist", + "description" : "output file (txt) with inner distances of all paired reads", + "default" : [ + "RSeQC/innerdistance/txt/$id.inner_distance.txt" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--inner_dist_output_freq", + "description" : "output file (txt) with frequencies of inner distances of all paired reads", + "default" : [ + "RSeQC/innerdistance/txt/$id.inner_distance_freq.txt" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--inner_dist_output_plot", + "description" : "output file (pdf) with histogram plot of of inner distances of all paired reads", + "default" : [ + "RSeQC/innerdistance/pdf/$id.inner_distance_plot.pdf" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--inner_dist_output_plot_r", + "description" : "output file (R) with script of histogram plot of of inner distances of all paired reads", + "default" : [ + "RSeQC/innerdistance/rscript/$id.inner_distance_plot.r" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_log", + "description" : "output log of junction annotation script", + "default" : [ + "RSeQC/junctionannotation/log/$id.junction_annotation.log" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_plot_r", + "description" : "R script to generate splice_junction and splice_events plot", + "default" : [ + "RSeQC/junctionannotation/rscript/$id.junction_annotation_plot.r" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_junction_bed", + "description" : "junction annotation file (bed format)", + "default" : [ + "RSeQC/junctionannotation/bed/$id.junction_annotation.bed" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_junction_interact", + "description" : "interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization.", + "default" : [ + "RSeQC/junctionannotation/bed/$id.junction_annotation.Interact.bed" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_junction_sheet", + "description" : "junction annotation file (xls format)", + "default" : [ + "RSeQC/junctionannotation/xls/$id.junction_annotation.xls" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_splice_events_plot", + "description" : "plot of splice events (pdf)", + "default" : [ + "RSeQC/junctionannotation/pdf/$id.splice_events.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_annotation_output_splice_junctions_plot", + "description" : "plot of junctions (pdf)", + "default" : [ + "RSeQC/junctionannotation/pdf/$id.splice_junctions_plot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_saturation_output_plot_r", + "description" : "r script to generate junction_saturation_plot plot", + "default" : [ + "RSeQC/junctionsaturation/rscript/$id.junction_saturation_plot.r" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--junction_saturation_output_plot", + "description" : "plot of junction saturation (pdf", + "default" : [ + "RSeQC/junctionsaturation/pdf/$id.junction_saturation_plot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read_distribution_output", + "description" : "output file (txt) of read distribution analysis.", + "default" : [ + "RSeQC/readdistribution/$id.read_distribution.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read_duplication_output_duplication_rate_plot_r", + "description" : "R script for generating duplication rate plot", + "default" : [ + "RSeQC/readduplication/rscrpt/$id.duplication_rate_plot.r" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read_duplication_output_duplication_rate_plot", + "description" : "duplication rate plot (pdf)", + "default" : [ + "RSeQC/readduplication/pdf/$id.duplication_rate_plot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read_duplication_output_duplication_rate_mapping", + "description" : "Summary of mapping-based read duplication", + "default" : [ + "RSeQC/readduplication/xls/$id.duplication_rate_mapping.xls" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--read_duplication_output_duplication_rate_sequence", + "description" : "Summary of sequencing-based read duplication", + "default" : [ + "RSeQC/readduplication/xls/$id.duplication_rate_sequencing.xls" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tin_output_summary", + "description" : "summary statistics (txt) of calculated TIN metrics", + "default" : [ + "RSeQC/tin/txt/$id.tin_summary.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--tin_output_metrics", + "description" : "file with TIN metrics (xls)", + "default" : [ + "RSeQC/tin/xls/$id.tin.xls" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_dupmatrix", + "description" : "path to output file (txt) of duplicate tag counts", + "default" : [ + "dupradar/gene_data/$id.dup_matrix.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_dup_intercept_mqc", + "description" : "path to output file (txt) of multiqc intercept value DupRadar", + "default" : [ + "dupradar/mqc_intercept/$id.dup_intercept_mqc.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_duprate_exp_boxplot", + "description" : "path to output file (pdf) of distribution of expression box plot", + "default" : [ + "dupradar/box_plot/$id.duprate_exp_boxplot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_duprate_exp_densplot", + "description" : "path to output file (pdf) of 2D density scatter plot of duplicate tag counts", + "default" : [ + "dupradar/scatter_plot/$id.duprate_exp_densityplot.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_duprate_exp_denscurve_mqc", + "description" : "path to output file (pdf) of density curve of gene duplication multiqc", + "default" : [ + "dupradar/density_curve/$id.duprate_exp_density_curve_mqc.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_expression_histogram", + "description" : "path to output file (pdf) of distribution of RPK values per gene histogram", + "default" : [ + "dupradar/histogram/$id.expression_hist.pdf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--dupradar_output_intercept_slope", + "default" : [ + "dupradar/intercept_slope/$id.intercept_slope.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--qualimap_qc_report", + "description" : "Text file containing the RNAseq QC results.", + "default" : [ + "Qualimap/$id.rnaseq_qc_results.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--qualimap_counts", + "description" : "Output file for computed counts.", + "default" : [ + "Qualimap/$id.counts.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--qualimap_report", + "description" : "Report output file. Supported formats are PDF or HTML.", + "default" : [ + "Qualimap/$id.report.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--deseq2_output", + "default" : [ + "deseq2_qc" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--deseq2_output_pseudo", + "default" : [ + "deseq2_qc_pseudo" + ], + "must_exist" : true, + "create_parent" ''' + ''': true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--multiqc_report", + "default" : [ + "multiqc/multiqc_report.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--multiqc_data", + "default" : [ + "multiqc/multiqc_data" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--multiqc_plots", + "default" : [ + "multiqc/multiqc_plots" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--multiqc_versions", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_counts_gene", + "default" : [ + "pseudo_alignment_quantification/gene_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_counts_gene_length_scaled", + "default" : [ + "pseudo_alignment_quantification/gene_counts_length_scaled.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_counts_gene_scaled", + "default" : [ + "pseudo_alignment_quantification/gene_counts_scaled.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_tpm_gene", + "default" : [ + "pseudo_alignment_quantification/gene_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_tpm_transcript", + "default" : [ + "pseudo_alignment_quantification/transcript_tpm.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_counts_transcript", + "default" : [ + "pseudo_alignment_quantification/transcript_counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--pseudo_quant_merged_summarizedexperiment", + "default" : [ + "pseudo_alignment_quantification/quant_merged_summarizedexperiment" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "A viash workflow for the nf-core/rnaseq pipeline.\n", + "test_resources" : [ + { + "type" : "nextflow_script", + "path" : "test.nf", + "is_executable" : true, + "entrypoint" : "test_wf" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "dependencies" : [ + { + "name" : "workflows/prepare_genome", + "repository" : { + "type" : "local" + } + }, + { + "name" : "cat_fastq", + "repository" : { + "type" : "local" + } + }, + { + "name" : "workflows/pre_processing", + "repository" : { + "type" : "local" + } + }, + { + "name" : "workflows/genome_alignment_and_quant", + "repository" : { + "type" : "local" + } + }, + { + "name" : "workflows/pseudo_alignment_and_quant", + "repository" : { + "type" : "local" + } + }, + { + "name" : "workflows/post_processing", + "repository" : { + "type" : "local" + } + }, + { + "name" : "workflows/quality_control", + "repository" : { + "type" : "local" + } + } + ], + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/workflows/rnaseq/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native", + "output" : "target/nextflow/workflows/rnaseq", + "viash_version" : "0.9.4", + "git_commit" : "e21130ff7a4a215d28ec1988730cbc95c0259076", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "v0.3.0", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "v0.3.1" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.0'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) +meta["root_dir"] = getRootDir() +include { prepare_genome } from "${meta.resources_dir}/../../../nextflow/workflows/prepare_genome/main.nf" +include { cat_fastq } from "${meta.resources_dir}/../../../nextflow/cat_fastq/main.nf" +include { pre_processing } from "${meta.resources_dir}/../../../nextflow/workflows/pre_processing/main.nf" +include { genome_alignment_and_quant } from "${meta.resources_dir}/../../../nextflow/workflows/genome_alignment_and_quant/main.nf" +include { pseudo_alignment_and_quant } from "${meta.resources_dir}/../../../nextflow/workflows/pseudo_alignment_and_quant/main.nf" +include { post_processing } from "${meta.resources_dir}/../../../nextflow/workflows/post_processing/main.nf" +include { quality_control } from "${meta.resources_dir}/../../../nextflow/workflows/quality_control/main.nf" + +// inner workflow +// user-provided Nextflow code +workflow run_wf { + take: + input_ch + + main: + + reference_ch = input_ch + + | map { id, state -> + def biotype = state.gencode ? "gene_type" : state.featurecounts_group_type + def filter_gtf = + ( + ( !state.skip_alignment && state.aligner) // Condition 1: Alignment is required and aligner is set + || ( !state.skip_pseudo_alignment && state.pseudo_aligner ) // Condition 2: Pseudoalignment is required and pseudoaligner is set + || ( !state.transcript_fasta ) // Condition 3: Transcript FASTA file is not provided + ) + && + ( !state.skip_gtf_filter ) // Condition 4: --skip_gtf_filter is not provided + [ id, state + [ biotype: biotype, filter_gtf: filter_gtf ] ] + } + + | toSortedList + + | map { list -> + [ "ref", [ + fasta: list.collect { id, state -> state.fasta }.unique()[0], + gtf: list.collect { id, state -> state.gtf }.unique()[0], + gff: list.collect { id, state -> state.gff }.unique()[0], + additional_fasta: list.collect { id, state -> state.additional_fasta }.unique()[0], + transcript_fasta:list.collect { id, state -> state.transcript_fasta }.unique()[0], + gene_bed: list.collect { id, state -> state.gene_bed }.unique()[0], + bbsplit_fasta_list: list.collect { id, state -> state.bbsplit_fasta_list }.unique()[0], + aligner: list.collect { id, state -> state.aligner }.unique()[0], + pseudo_aligner: list.collect { id, state -> state.pseudo_aligner }.unique()[0], + star_index: list.collect { id, state -> state.star_index }.unique()[0], + rsem_index: list.collect { id, state -> state.rsem_index }.unique()[0], + salmon_index: list.collect { id, state -> state.salmon_index }.unique()[0], + kallisto_index: list.collect { id, state -> state.kallisto_index }.unique()[0], + // splicesites: list.collect { id, state -> state.splicesites }.unique()[0], + // hisat2_index: list.collect { id, state -> state.hisat2_index }.unique()[0], + bbsplit_index: list.collect { id, state -> state.bbsplit_index }.unique()[0], + // See: + skip_bbsplit: true, // list.collect { id, state -> state.skip_bbsplit }.unique()[0], + skip_alignment: list.collect { id, state -> state.skip_alignment }.unique()[0], + gencode: list.collect { id, state -> state.gencode }.unique()[0], + biotype: list.collect { id, state -> state.biotype }.unique()[0], + star_sjdb_gtf_feature_exon: list.collect { id, state -> state.star_sjdb_gtf_feature_exon }.unique()[0], + filter_gtf: list.collect { id, state -> state.filter_gtf }.unique()[0], + pseudo_aligner_kmer_size: list.collect { id, state -> state.pseudo_aligner_kmer_size }.unique()[0] ] + ] + } + + // prepare all the necessary files for reference genome + | prepare_genome.run ( + fromState: [ + "fasta": "fasta", + "gtf": "gtf", + "gff": "gff", + "additional_fasta": "additional_fasta", + "transcript_fasta": "transcript_fasta", + "gene_bed": "gene_bed", + "bbsplit_fasta_list": "bbsplit_fasta_list", + "star_index": "star_index", + "rsem_index": "rsem_index", + "salmon_index": "salmon_index", + "kallisto_index": "kallisto_index", + "pseudo_aligner_kmer_size": "pseudo_aligner_kmer_size", + // "splicesites": "splicesites", + // "hisat2_index": "hisat2_index", + "bbsplit_index": "bbsplit_index", + "skip_bbsplit": "skip_bbsplit", + "gencode": "gencode", + "biotype": "biotype", + "filter_gtf": "filter_gtf", + "aligner": "aligner", + "pseudo_aligner": "pseudo_aligner", + "skip_alignment": "skip_alignment", + "star_sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon" + ], + toState: [ + "fasta": "uncompressed_fasta", + "gtf": "gtf_uncompressed", + "transcript_fasta": "transcript_fasta_uncompressed", + "fai": "fai", + "chrom_sizes": "chrom_sizes", + "bbsplit_index": "bbsplit_index_uncompressed", + "star_index": "star_index_uncompressed", + "salmon_index": "salmon_index_uncompressed", + "kallisto_index": "kallisto_index_uncompressed", + "gene_bed": "gene_bed_uncompressed" + ] + ) + + // Check if contigs in genome fasta file > 512 Mbp + | map { id, state -> + (isBelowMaxContigSize(state.fai)) ? [id, state] : [id, state + [bam_csi_index: true]] + } + + // Pick out the state + | map { list -> list[1]} + + analysis_ch = input_ch + + | combine(reference_ch) + + | map { list -> [list[0], list[1] + list[2]] } + + // Concatenate FastQ files from same sample if required + | cat_fastq.run ( + fromState: [ + "read_1": "fastq_1", + "read_2": "fastq_2" + ], + toState: [ + "fastq_1": "fastq_1", + "fastq_2": "fastq_2" + ], + directives: [ label: [ "lowmem", "midcpu" ] ] + ) + + // Pre-process fastq files + | pre_processing.run ( + fromState: [ + "id": "id", + "fastq_1": "fastq_1", + "fastq_2": "fastq_2", + "umitools_bc_pattern": "umitools_bc_pattern", + "umitools_bc_pattern2": "umitools_bc_pattern2", + "strandedness": "strandedness", + "transcript_fasta": "transcript_fasta", + "gtf": "gtf", + "with_umi": "with_umi", + "bbsplit_index": "bbsplit_index", + "bbsplit_fasta_list": "bbsplit_fasta_list", + "bc_pattern": "bc_pattern", + "ribo_database_manifest": "ribo_database_manifest", + "salmon_index": "salmon_index", + "skip_qc": "skip_qc", + "skip_fastqc": "skip_fastqc", + "skip_skip_umi_extract": "skip_umi_extract", + "umi_discard_read": "umi_discard_read", + "skip_trimming": "skip_trimming", + "trimmer": "trimmer", + "skip_bbsplit": "skip_bbsplit", + "remove_ribo_rna": "remove_ribo_rna" + ], + toState: [ + "fastqc_html_1": "fastqc_html_1", + "fastqc_html_2": "fastqc_html_2", + "fastqc_zip_1": "fastqc_zip_1", + "fastqc_zip_2": "fastqc_zip_2", + "fastq_1": "qc_output1", + "fastq_2": "qc_output2", + "trim_log_1": "trim_log_1", + "trim_log_2": "trim_log_2", + "trim_zip_1": "trim_zip_1", + "trim_zip_2": "trim_zip_2", + "trim_html_1": "trim_html_1", + "trim_html_2": "trim_html_2", + "passed_trimmed_reads": "passed_trimmed_reads", + "num_trimmed_reads": "num_trimmed_reads", + "sortmerna_log": "sortmerna_log", + "salmon_quant_output": "salmon_quant_output", + "fastp_failed_trim": "failed_trim", + "fastp_failed_trim_unpaired1": "failed_trim_unpaired1", + "fastp_failed_trim_unpaired2": "failed_trim_unpaired2", + "fastp_trim_json": "trim_json", + "fastp_trim_html": "trim_html", + "fastp_trim_merged_out": "trim_merged_out" + ] + ) + + // Infer strandedness from Salmon pseudo-alignment results + | map { id, state -> + (state.strandedness == 'auto') ? + [ id, state + [strandedness: getSalmonInferredStrandedness(state.salmon_quant_output)] ] : + [id, state] + } + + // Filter FastQ files based on minimum trimmed read count after adapter trimming + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def num_reads = (state.skip_trimming) ? + state.min_trimmed_reads + 1 : + ( + (state.trimmer == "fastp") ? + getFastpReadsAfterFiltering(state.fastp_trim_json) : + ( + (!state.skip_trimming && input.size() == 2) ? + getTrimGaloreReadsAfterFiltering(state.trim_log_2) : + getTrimGaloreReadsAfterFiltering(state.trim_log_1) + ) + ) + def passed_trimmed_reads = + (state.skip_trimming || (num_reads >= state.min_trimmed_reads)) ? + true : + false + [ id, state + [num_trimmed_reads: num_reads, passed_trimmed_reads: passed_trimmed_reads] ] + } + + // Genome alignment and quantification + | genome_alignment_and_quant.run ( + runIf: { id, state -> !state.skip_alignment && state.passed_trimmed_reads }, + fromState: [ + "id": "id", + "fastq_1": "fastq_1", + "fastq_2": "fastq_2", + "strandedness": "strandedness", + "gtf": "gtf", + "transcript_fasta": "transcript_fasta", + "bam_csi_index": "bam_csi_index", + "aligner": "aligner", + "rsem_index": "rsem_index", + "star_index": "star_index", + "star_sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon", + "star_ignore_sjdbgtf": "star_ignore_sjdbgtf", + "with_umi": "with_umi", + "umi_dedup_stats": "umi_dedup_stats", + "gtf_group_features": "gtf_group_features", + "gtf_extra_attributes": "gtf_extra_attributes", + "salmon_quant_libtype": "salmon_quant_libtype", + "salmon_index": "salmon_index", + "extra_rsem_calculate_expression_args": "extra_rsem_calculate_expression_args" + ], + toState: [ + "star_multiqc": "star_multiqc", + "rsem_multiqc": "rsem_multiqc", + "salmon_multiqc": "salmon_multiqc", + "genome_bam_sorted": "genome_bam_sorted", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "transcriptome_bam": "transcriptome_bam", + "transcriptome_bam_index": "transcriptome_bam_index", + "transcriptome_bam_stats": "transcriptome_bam_stats", + "transcriptome_bam_flagstat": "transcriptome_bam_flagstat", + "transcriptome_bam_idxstats": "transcriptome_bam_idxstats", + "quant_out_dir": "quant_out_dir", + "quant_results_file": "quant_results_file", + "rsem_counts_gene": "rsem_counts_gene", + "rsem_counts_transcripts": "rsem_counts_transcripts", + "bam_genome_rsem": "bam_genome_rsem", + "bam_transcript_rsem": "bam_transcript_rsem" + ] + ) + + // Filter channels to get samples that passed STAR minimum mapping percentage + | map { id, state -> + def percent_mapped = (!state.skip_alignment && state.passed_trimmed_reads) ? getStarPercentMapped(state.star_multiqc) : 0.0 + def passed_mapping = (percent_mapped >= state.min_mapped_reads) ? true : false + [ id, state + [percent_mapped: percent_mapped, passed_mapping: passed_mapping] ] + } + + // Pseudo-alignment and quantification + | pseudo_alignment_and_quant.run ( + runIf: { id, state -> !state.skip_pseudo_alignment && state.passed_trimmed_reads }, + fromState: [ + "id": "id", + "fastq_1": "fastq_1", + "fastq_2": "fastq_2", + "strandedness": "strandedness", + "gtf": "gtf", + "transcript_fasta": "transcript_fasta", + "pseudo_aligner": "pseudo_aligner", + "salmon_index": "salmon_index", + "kallisto_index": "kallisto_index", + "extra_star_align_args": "extra_star_align_args", + "star_ignore_sjdbgtf": "star_ignore_sjdbgtf", + "seq_platform": "seq_platform", + "seq_center": "seq_center", + "with_umi": "with_umi", + "umi_dedup_stats": "umi_dedup_stats", + "gtf_group_features": "gtf_group_features", + "gtf_extra_attributes": "gtf_extra_attributes", + "lib_type": "salmon_quant_libtype", + "kallisto_quant_fragment_length": "kallisto_quant_fragment_length", + "kallisto_quant_fragment_length_sd": "kallisto_quant_fragment_length_sd" + ], + toState: [ + "pseudo_quant_out_dir": "quant_out_dir", + "pseudo_salmon_quant_results_file": "salmon_quant_results_file", + "pseudo_kallisto_quant_results_file": "kallisto_quant_results_file", + "pseudo_multiqc": "pseudo_multiqc" + ] + ) + + | map { id, state -> + def input = state.fastq_2 ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] + def paired = input.size() == 2 + [ id, state + [ paired: paired ] ] + } + + // Post-processing + | post_processing.run ( + runIf: { id, state -> !state.skip_alignment && state.passed_trimmed_reads && state.passed_mapping }, + fromState: [ + "id": "id", + "paired": "paired", + "strandedness": "strandedness", + "fasta": "fasta", + "fai": "fai", + "gtf": "gtf", + "genome_bam": "genome_bam_sorted", + "chrom_sizes": "chrom_sizes", + "star_multiqc": "star_multiqc", + "extra_picard_args": "extra_picard_args", + "extra_stringtie_args": "extra_stringtie_args", + "stringtie_ignore_gtf": "stringtie_ignore_gtf", + "extra_bedtools_args": "extra_bedtools_args", + "bam_csi_index": "bam_csi_index", + "min_mapped_reads": "min_mapped_reads", + "with_umi": "with_umi", + "skip_qc": "skip_qc", + "skip_markduplicates": "skip_markduplicates", + "skip_stringtie": "skip_stringtie", + "skip_bigwig":"gencode" + ], + toState: [ + "genome_bam_sorted": "processed_genome_bam", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "markduplicates_metrics": "markduplicates_metrics", + "stringtie_transcript_gtf": "stringtie_transcript_gtf", + "stringtie_coverage_gtf": "stringtie_coverage_gtf", + "stringtie_abundance": "stringtie_abundance", + "stringtie_ballgown": "stringtie_ballgown", + "bedgraph_forward": "bedgraph_forward", + "bedgraph_reverse": "bedgraph_reverse", + "bigwig_forward": "bigwig_forward", + "bigwig_reverse": "bigwig_reverse" + ] + ) + + // Final QC + + // Temporarily skip DESeq2 QC for now + // https://github.com/viash-hub/rnaseq/issues/31 + | map{ id, state -> [ id, state + [ skip_deseq2_qc: true ] ] } + + | quality_control.run ( + fromState: [ + "id": "id", + "paired": "paired", + "strandedness": "strandedness", + "skip_align": "skip_alignment", + "skip_pseudo_align": "skip_pseudo_alignment", + "skip_dupradar": "skip_dupradar", + "skip_qualimap": "skip_qualimap", + "skip_rseqc": "skip_rseqc", + "skip_multiqc": "skip_multiqc", + "skip_preseq": "skip_preseq", + "gtf": "gtf", + "num_trimmed_reads": "num_trimmed_reads", + "passed_trimmed_reads": "passed_trimmed_reads", + "passed_mapping": "passed_mapping", + "percent_mapped": "percent_mapped", + "genome_bam": "genome_bam_sorted", + "genome_bam_index": "genome_bam_index", + "salmon_multiqc": "salmon_multiqc", + "quant_results_file": "quant_results_file", + "rsem_multiqc": "rsem_multiqc", + "rsem_counts_gene": "rsem_counts_gene", + "rsem_counts_transcripts": "rsem_counts_transcripts", + "pseudo_multiqc": "pseudo_multiqc", + "pseudo_quant_out_dir": "pseudo_quant_out_dir", + "pseudo_salmon_quant_results_file": "pseudo_salmon_quant_results_file", + "pseudo_kallisto_quant_results_file": "pseudo_kallisto_quant_results_file", + "aligner": "aligner", + "pseudo_aligner": "pseudo_aligner", + "gene_bed": "gene_bed", + "extra_preseq_args": "extra_preseq_args", + "biotype": "biotype", + "skip_biotype_qc": "skip_biotype_qc", + "featurecounts_group_type": "featurecounts_group_type", + "featurecounts_feature_type": "featurecounts_feature_type", + "gencode": "gencode", + "skip_deseq2_qc": "skip_deseq2_qc", + "deseq2_vst": "deseq2_vst", + "multiqc_custom_config": "multiqc_custom_config", + "multiqc_title": "multiqc_title", + "multiqc_methods_description": "multiqc_methods_description", + "fastqc_zip_1": "fastqc_zip_1", + "fastqc_zip_2": "fastqc_zip_2", + "trim_log_1": "trim_log_1", + "trim_log_2": "trim_log_2", + "trim_zip_1": "trim_zip_1", + "trim_zip_2": "trim_zip_2", + "sortmerna_multiqc": "sortmerna_log", + "star_multiqc": "star_multiqc", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "markduplicates_multiqc": "markduplicates_metrics", + "rseqc_modules": "rseqc_modules" + ], + toState: [ + "preseq_output": "preseq_output", + "bamstat_output": "bamstat_output", + "strandedness_output": "strandedness_output", + "inner_dist_output_stats": "inner_dist_output_stats", + "inner_dist_output_dist": "inner_dist_output_dist", + "inner_dist_output_freq": "inner_dist_output_freq", + "inner_dist_output_plot": "inner_dist_output_plot", + "inner_dist_output_plot_r": "inner_dist_output_plot_r", + "junction_annotation_output_log": "junction_annotation_output_log", + "junction_annotation_output_plot_r": "junction_annotation_output_plot_r", + "junction_annotation_output_junction_bed": "junction_annotation_output_junction_bed", + "junction_annotation_output_junction_interact": "junction_annotation_output_junction_interact", + "junction_annotation_output_junction_sheet": "junction_annotation_output_junction_sheet", + "junction_annotation_output_splice_events_plot": "junction_annotation_output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "junction_annotation_output_splice_junctions_plot", + "junction_saturation_output_plot_r": "junction_saturation_output_plot_r", + "junction_saturation_output_plot": "junction_saturation_output_plot", + "read_distribution_output": "read_distribution_output", + "read_duplication_output_duplication_rate_plot_r": "read_duplication_output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "read_duplication_output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "read_duplication_output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "read_duplication_output_duplication_rate_sequence", + "tin_output_summary": "tin_output_summary", + "tin_output_metrics": "tin_output_metrics", + "dupradar_output_dupmatrix": "dupradar_output_dupmatrix", + "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", + "dupradar_output_duprate_exp_boxplot": "dupradar_output_duprate_exp_boxplot", + "dupradar_output_duprate_exp_densplot": "dupradar_output_duprate_exp_densplot", + "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", + "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", + "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", + "featurecounts": "featurecounts", + "featurecounts_summary": "featurecounts_summary", + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc", + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment", + "deseq2_output": "deseq2_output", + "pseudo_tpm_gene": "pseudo_tpm_gene", + "pseudo_counts_gene": "pseudo_counts_gene", + "pseudo_counts_gene_length_scaled": "pseudo_counts_gene_length_scaled", + "pseudo_counts_gene_scaled": "pseudo_counts_gene_scaled", + "pseudo_tpm_transcript": "pseudo_tpm_transcript", + "pseudo_counts_transcript": "pseudo_counts_transcript", + "pseudo_lengths_gene": "pseudo_lengths_gene", + "pseudo_lengths_transcript": "pseudo_lengths_transcript", + "pseudo_quant_merged_summarizedexperiment": "pseudo_quant_merged_summarizedexperiment", + "deseq2_output_pseudo": "deseq2_output_pseudo", + "multiqc_report": "multiqc_report", + "multiqc_data": "multiqc_data", + "multiqc_plots": "multiqc_plots" + ] + ) + + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } + + | setState ( + [ + "output_fasta": "fasta", + "output_gtf": "gtf", + "output_transcript_fasta": "transcript_fasta", + "output_gene_bed": "gene_bed", + "output_bbsplit_index": "bbsplit_index", + "output_star_index": "star_index", + "output_salmon_index": "salmon_index", + "output_kallisto_index": "kallisto_index", + "fastqc_html_1": "fastqc_html_1", + "fastqc_html_2": "fastqc_html_2", + "fastqc_zip_1": "fastqc_zip_1", + "fastqc_zip_2": "fastqc_zip_2", + "output_fastq_1": "fastq_1", + "output_fastq_2": "fastq_2", + "trim_log_1": "trim_log_1", + "trim_log_2": "trim_log_2", + "trim_zip_1": "trim_zip_1", + "trim_zip_2": "trim_zip_2", + "trim_html_1": "trim_html_1", + "trim_html_2": "trim_html_2", + "sortmerna_log": "sortmerna_log", + "star_log": "star_multiqc", + "genome_bam_sorted": "genome_bam_sorted", + "genome_bam_index": "genome_bam_index", + "genome_bam_stats": "genome_bam_stats", + "genome_bam_flagstat": "genome_bam_flagstat", + "genome_bam_idxstats": "genome_bam_idxstats", + "transcriptome_bam": "transcriptome_bam", + "transcriptome_bam_index": "transcriptome_bam_index", + "transcriptome_bam_stats": "transcriptome_bam_stats", + "transcriptome_bam_flagstat": "transcriptome_bam_flagstat", + "transcriptome_bam_idxstats": "transcriptome_bam_idxstats", + "salmon_quant_results": "quant_out_dir", + "pseudo_quant_results": "pseudo_quant_out_dir", + "markduplicates_metrics": "markduplicates_metrics", + "stringtie_transcript_gtf": "stringtie_transcript_gtf", + "stringtie_coverage_gtf": "stringtie_coverage_gtf", + "stringtie_abundance": "stringtie_abundance", + "stringtie_ballgown": "stringtie_ballgown", + "featurecounts": "featurecounts", + "featurecounts_summary": "featurecounts_summary", + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc", + "bedgraph_forward": "bedgraph_forward", + "bedgraph_reverse": "bedgraph_reverse", + "bigwig_forward": "bigwig_forward", + "bigwig_reverse": "bigwig_reverse", + "preseq_output": "preseq_output", + "bamstat_output": "bamstat_output", + "strandedness_output": "strandedness_output", + "inner_dist_output_stats": "inner_dist_output_stats", + "inner_dist_output_dist": "inner_dist_output_dist", + "inner_dist_output_freq": "inner_dist_output_freq", + "inner_dist_output_plot": "inner_dist_output_plot", + "inner_dist_output_plot_r": "inner_dist_output_plot_r", + "junction_annotation_output_log": "junction_annotation_output_log", + "junction_annotation_output_plot_r": "junction_annotation_output_plot_r", + "junction_annotation_output_junction_bed": "junction_annotation_output_junction_bed", + "junction_annotation_output_junction_interact": "junction_annotation_output_junction_interact", + "junction_annotation_output_junction_sheet": "junction_annotation_output_junction_sheet", + "junction_annotation_output_splice_events_plot": "junction_annotation_output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "junction_annotation_output_splice_junctions_plot", + "junction_saturation_output_plot_r": "junction_saturation_output_plot_r", + "junction_saturation_output_plot": "junction_saturation_output_plot", + "read_distribution_output": "read_distribution_output", + "read_duplication_output_duplication_rate_plot_r": "read_duplication_output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "read_duplication_output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "read_duplication_output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "read_duplication_output_duplication_rate_sequence", + "tin_output_summary": "tin_output_summary", + "tin_output_metrics": "tin_output_metrics", + "dupradar_output_dupmatrix": "dupradar_output_dupmatrix", + "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", + "dupradar_output_duprate_exp_boxplot": "dupradar_output_duprate_exp_boxplot", + "dupradar_output_duprate_exp_densplot": "dupradar_output_duprate_exp_densplot", + "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", + "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", + "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", + "tpm_gene": "tpm_gene", + "counts_gene": "counts_gene", + "counts_gene_length_scaled": "counts_gene_length_scaled", + "counts_gene_scaled": "counts_gene_scaled", + "tpm_transcript": "tpm_transcript", + "counts_transcript": "counts_transcript", + "quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment", + "deseq2_output": "deseq2_output", + "pseudo_tpm_gene": "pseudo_tpm_gene", + "pseudo_counts_gene": "pseudo_counts_gene", + "pseudo_counts_gene_length_scaled": "pseudo_counts_gene_length_scaled", + "pseudo_counts_gene_scaled": "pseudo_counts_gene_scaled", + "pseudo_tpm_transcript": "pseudo_tpm_transcript", + "pseudo_counts_transcript": "pseudo_counts_transcript", + "pseudo_lengths_gene": "pseudo_lengths_gene", + "pseudo_lengths_transcript": "pseudo_lengths_transcript", + "pseudo_quant_merged_summarizedexperiment": "pseudo_quant_merged_summarizedexperiment", + "deseq2_output_pseudo": "deseq2_output_pseudo", + "multiqc_report": "multiqc_report", + "multiqc_data": "multiqc_data", + "multiqc_plots": "multiqc_plots" + ] + ) + + emit: + analysis_ch +} + +import nextflow.Nextflow +// +// Function to generate an error if contigs in genome fasta file > 512 Mbp +// +def isBelowMaxContigSize(fai_file) { + def max_size = 512000000 + fai_file.eachLine { line -> + def lspl = line.split('\t') + def chrom = lspl[0] + def size = lspl[1] + if (size.toInteger() > max_size) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Contig longer than ${max_size}bp found in reference genome!\n\n" + + " ${chrom}: ${size}\n\n" + + " Provide the '--bam_csi_index' parameter to use a CSI instead of BAI index.\n\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.error(error_string) + return false + } + } + return true +} + +// Find a file in a directory +import java.nio.file.* +def findFile(Path dir, String fileName) { + Files.walk(dir) + .filter { Files.isRegularFile(it) && it.fileName.toString() == fileName } + .findFirst() + .orElse(null) +} + +import groovy.json.JsonSlurper +// +// Function that parses Salmon quant 'meta_info.json' output file to get inferred strandedness +// +def getSalmonInferredStrandedness(salmon_quant_output) { + def json_file = findFile(salmon_quant_output, 'meta_info.json') + def lib_type = new JsonSlurper().parseText(json_file.text).get('library_types')[0] + def strandedness = 'reverse' + if (lib_type) { + if (lib_type in ['U', 'IU']) { + strandedness = 'unstranded' + } + else if (lib_type in ['SF', 'ISF']) { + strandedness = 'forward' + } + else if (lib_type in ['SR', 'ISR']) { + strandedness = 'reverse' + } + } + return strandedness +} + +// +// Function that parses TrimGalore log output file to get total number of reads after trimming +// +def getTrimGaloreReadsAfterFiltering(log_file) { + def total_reads = 0 + def filtered_reads = 0 + log_file.eachLine { line -> + def total_reads_matcher = line =~ /([\d\.]+)\ssequences processed in total/ + def filtered_reads_matcher = line =~ /shorter than the length cutoff[^:]+:\s([\d\.]+)/ + if (total_reads_matcher) total_reads = total_reads_matcher[0][1].toFloat() + if (filtered_reads_matcher) filtered_reads = filtered_reads_matcher[0][1].toFloat() + } + return total_reads - filtered_reads +} + +// +// Function that parses fastp json output file to get total number of reads after trimming +// +def getFastpReadsAfterFiltering(json_file) { + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') + return json['after_filtering']['total_reads'].toLong() +} + +// +// Function that parses and returns the alignment rate from the STAR log outputs +// +def getStarPercentMapped(align_log) { + def percent_aligned = 0 + def pattern = /Uniquely mapped reads %\s*\|\s*([\d\.]+)%/ + align_log.eachLine { line -> + def matcher = line =~ pattern + if (matcher) { + percent_aligned = matcher[0][1].toFloat() + } + } + return percent_aligned +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/workflows/rnaseq/nextflow.config b/target/nextflow/workflows/rnaseq/nextflow.config new file mode 100644 index 0000000..37c79af --- /dev/null +++ b/target/nextflow/workflows/rnaseq/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'workflows/rnaseq' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.0' + description = 'A viash workflow for the nf-core/rnaseq pipeline.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/workflows/rnaseq/nextflow_labels.config b/target/nextflow/workflows/rnaseq/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/workflows/rnaseq/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/workflows/rnaseq/nextflow_schema.json b/target/nextflow/workflows/rnaseq/nextflow_schema.json new file mode 100644 index 0000000..6276246 --- /dev/null +++ b/target/nextflow/workflows/rnaseq/nextflow_schema.json @@ -0,0 +1,2144 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "rnaseq", +"description": "A viash workflow for the nf-core/rnaseq pipeline.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "string", + "description": "Type: `string`, required, example: `foo`. ID of the sample", + "help_text": "Type: `string`, required, example: `foo`. ID of the sample." + + } + + + , + "fastq_1": { + "type": + "string", + "description": "Type: List of `file`, required, multiple_sep: `\";\"`. Path to the sample (or read 1 of paired end sample)", + "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Path to the sample (or read 1 of paired end sample)." + + } + + + , + "fastq_2": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. Path to read 2 of the sample", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Path to read 2 of the sample." + + } + + + , + "strandedness": { + "type": + "string", + "description": "Type: `string`, default: `auto`, choices: ``unstranded`, `forward`, `reverse`, `auto``. Sample strand-specificity", + "help_text": "Type: `string`, default: `auto`, choices: ``unstranded`, `forward`, `reverse`, `auto``. Sample strand-specificity. Must be one of unstranded, forward, reverse or auto", + "enum": ["unstranded", "forward", "reverse", "auto"] + + , + "default":"auto" + } + + +} +}, + + + "reference genome options" : { + "title": "Reference genome options", + "type": "object", + "description": "No description", + "properties": { + + + "fasta": { + "type": + "string", + "description": "Type: `file`, required. Path to FASTA genome file", + "help_text": "Type: `file`, required. Path to FASTA genome file." + + } + + + , + "gtf": { + "type": + "string", + "description": "Type: `file`. Path to GTF annotation file", + "help_text": "Type: `file`. Path to GTF annotation file. This parameter is *mandatory* if --genome is not specified." + + } + + + , + "gff": { + "type": + "string", + "description": "Type: `file`. Path to GFF3 annotation file", + "help_text": "Type: `file`. Path to GFF3 annotation file. Required if \"--gtf\" is not specified." + + } + + + , + "additional_fasta": { + "type": + "string", + "description": "Type: `file`. FASTA file to concatenate to genome FASTA file e", + "help_text": "Type: `file`. FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences." + + } + + + , + "transcript_fasta": { + "type": + "string", + "description": "Type: `file`. Path to FASTA transcriptome file", + "help_text": "Type: `file`. Path to FASTA transcriptome file." + + } + + + , + "gene_bed": { + "type": + "string", + "description": "Type: `file`. Path to BED file containing gene intervals", + "help_text": "Type: `file`. Path to BED file containing gene intervals. This will be created from the GTF file if not specified." + + } + + + , + "splicesites": { + "type": + "string", + "description": "Type: `file`. Splice sites file required for HISAT2", + "help_text": "Type: `file`. Splice sites file required for HISAT2." + + } + + + , + "star_index": { + "type": + "string", + "description": "Type: `file`. Path to directory or tar", + "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built STAR index." + + } + + + , + "rsem_index": { + "type": + "string", + "description": "Type: `file`. Path to directory or tar", + "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built RSEM index." + + } + + + , + "salmon_index": { + "type": + "string", + "description": "Type: `file`. Path to directory or tar", + "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built Salmon index." + + } + + + , + "kallisto_index": { + "type": + "string", + "description": "Type: `file`. Path to directory or tar", + "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built Kallisto index." + + } + + + , + "gencode": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Specify if the GTF annotation is in GENCODE format", + "help_text": "Type: `boolean_true`, default: `false`. Specify if the GTF annotation is in GENCODE format." + , + "default":false + } + + + , + "gtf_extra_attributes": { + "type": + "string", + "description": "Type: `string`, default: `gene_name`. Additional gene identifiers from the input GTF file when running Salmon", + "help_text": "Type: `string`, default: `gene_name`. Additional gene identifiers from the input GTF file when running Salmon. More than one value can be specified separated by comma." + , + "default":"gene_name" + } + + + , + "gtf_group_features": { + "type": + "string", + "description": "Type: `string`, default: `gene_id`. Define the attribute type used to group features in the GTF file when running Salmon", + "help_text": "Type: `string`, default: `gene_id`. Define the attribute type used to group features in the GTF file when running Salmon." + , + "default":"gene_id" + } + + + , + "featurecounts_group_type": { + "type": + "string", + "description": "Type: `string`, default: `gene_biotype`. The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts", + "help_text": "Type: `string`, default: `gene_biotype`. The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts." + , + "default":"gene_biotype" + } + + + , + "featurecounts_feature_type": { + "type": + "string", + "description": "Type: `string`, default: `exon`. By default, the pipeline assigns reads based on the \u0027exon\u0027 attribute within the GTF file", + "help_text": "Type: `string`, default: `exon`. By default, the pipeline assigns reads based on the \u0027exon\u0027 attribute within the GTF file." + , + "default":"exon" + } + + + , + "star_sjdb_gtf_feature_exon": { + "type": + "string", + "description": "Type: `string`. Feature type in GTF file to be used as exons for building transcripts", + "help_text": "Type: `string`. Feature type in GTF file to be used as exons for building transcripts" + + } + + +} +}, + + + "read trimming options" : { + "title": "Read trimming options", + "type": "object", + "description": "No description", + "properties": { + + + "trimmer": { + "type": + "string", + "description": "Type: `string`, default: `trimgalore`, choices: ``trimgalore`, `fastp``. Specify the trimming tool to use", + "help_text": "Type: `string`, default: `trimgalore`, choices: ``trimgalore`, `fastp``. Specify the trimming tool to use.", + "enum": ["trimgalore", "fastp"] + + , + "default":"trimgalore" + } + + + , + "min_trimmed_reads": { + "type": + "integer", + "description": "Type: `integer`, default: `10000`. Minimum number of trimmed reads below which samples are removed from further processing", + "help_text": "Type: `integer`, default: `10000`. Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low." + , + "default":10000 + } + + +} +}, + + + "read filtering options" : { + "title": "Read filtering options", + "type": "object", + "description": "No description", + "properties": { + + + "bbsplit_fasta_list": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. List of reference genomes (separated by \";\") to filter reads against with BBSplit", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. List of reference genomes (separated by \";\") to filter reads against with BBSplit." + + } + + + , + "bbsplit_index": { + "type": + "string", + "description": "Type: `file`. Path to directory or tar", + "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built BBSplit index." + + } + + + , + "remove_ribo_rna": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Enable the removal of reads derived from ribosomal RNA using SortMeRNA", + "help_text": "Type: `boolean_true`, default: `false`. Enable the removal of reads derived from ribosomal RNA using SortMeRNA." + , + "default":false + } + + + , + "ribo_database_manifest": { + "type": + "string", + "description": "Type: `file`, default: `src/assets/rrna-db-defaults.txt`. Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA", + "help_text": "Type: `file`, default: `src/assets/rrna-db-defaults.txt`. Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA." + , + "default":"src/assets/rrna-db-defaults.txt" + } + + +} +}, + + + "umi options" : { + "title": "UMI options", + "type": "object", + "description": "No description", + "properties": { + + + "with_umi": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Enable UMI-based read deduplication", + "help_text": "Type: `boolean_true`, default: `false`. Enable UMI-based read deduplication." + , + "default":false + } + + + , + "umitools_extract_method": { + "type": + "string", + "description": "Type: `string`, default: `string`, choices: ``string`, `regex``. UMI pattern to use", + "help_text": "Type: `string`, default: `string`, choices: ``string`, `regex``. UMI pattern to use.", + "enum": ["string", "regex"] + + , + "default":"string" + } + + + , + "umitools_bc_pattern": { + "type": + "string", + "description": "Type: `string`. The UMI barcode pattern to use e", + "help_text": "Type: `string`. The UMI barcode pattern to use e.g. \u0027NNNNNN\u0027 indicates that the first 6 nucleotides of the read are from the UMI." + + } + + + , + "umitools_bc_pattern2": { + "type": + "string", + "description": "Type: `string`. The UMI barcode pattern to use if the UMI is located in read 2", + "help_text": "Type: `string`. The UMI barcode pattern to use if the UMI is located in read 2." + + } + + + , + "umi_discard_read": { + "type": + "integer", + "description": "Type: `integer`, default: `0`, choices: ``0`, `1`, `2``. After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively", + "help_text": "Type: `integer`, default: `0`, choices: ``0`, `1`, `2``. After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively.", + "enum": [0, 1, 2] + + , + "default":0 + } + + + , + "umitools_umi_separator": { + "type": + "string", + "description": "Type: `string`, default: `_`. The character that separates the UMI in the read name", + "help_text": "Type: `string`, default: `_`. The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software." + , + "default":"_" + } + + + , + "umitools_grouping_method": { + "type": + "string", + "description": "Type: `string`, default: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. Method to use to determine read groups by subsuming those with similar UMIs", + "help_text": "Type: `string`, default: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.", + "enum": ["unique", "percentile", "cluster", "adjacency", "directional"] + + , + "default":"directional" + } + + + , + "umi_dedup_stats": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Generate output stats when running \"umi_tools dedup\"", + "help_text": "Type: `boolean_true`, default: `false`. Generate output stats when running \"umi_tools dedup\"." + , + "default":false + } + + +} +}, + + + "alignment options" : { + "title": "Alignment options", + "type": "object", + "description": "No description", + "properties": { + + + "aligner": { + "type": + "string", + "description": "Type: `string`, default: `star_salmon`, choices: ``star_salmon`, `star_rsem`, `hisat2``. Specifies the alignment algorithm to use - available options are \u0027star_salmon\u0027, \u0027star_rsem\u0027 and \u0027hisat2\u0027", + "help_text": "Type: `string`, default: `star_salmon`, choices: ``star_salmon`, `star_rsem`, `hisat2``. Specifies the alignment algorithm to use - available options are \u0027star_salmon\u0027, \u0027star_rsem\u0027 and \u0027hisat2\u0027.", + "enum": ["star_salmon", "star_rsem", "hisat2"] + + , + "default":"star_salmon" + } + + + , + "pseudo_aligner": { + "type": + "string", + "description": "Type: `string`, default: `salmon`, choices: ``salmon`, `kallisto``. Specifies the pseudo aligner to use - available options are \u0027salmon\u0027", + "help_text": "Type: `string`, default: `salmon`, choices: ``salmon`, `kallisto``. Specifies the pseudo aligner to use - available options are \u0027salmon\u0027. Runs in addition to \u0027--aligner\u0027.", + "enum": ["salmon", "kallisto"] + + , + "default":"salmon" + } + + + , + "pseudo_aligner_kmer_size": { + "type": + "integer", + "description": "Type: `integer`, default: `31`. Kmer length passed to indexing step of pseudoaligners", + "help_text": "Type: `integer`, default: `31`. Kmer length passed to indexing step of pseudoaligners." + , + "default":31 + } + + + , + "kallisto_quant_fragment_length": { + "type": + "number", + "description": "Type: `double`. For single-end mode only, the estimated average fragment length to use for quantification with Kallisto", + "help_text": "Type: `double`. For single-end mode only, the estimated average fragment length to use for quantification with Kallisto." + + } + + + , + "kallisto_quant_fragment_length_sd": { + "type": + "number", + "description": "Type: `double`. For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto", + "help_text": "Type: `double`. For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto." + + } + + + , + "bam_csi_index": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Create a CSI index for BAM files instead of the traditional BAI index", + "help_text": "Type: `boolean_true`, default: `false`. Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes." + , + "default":false + } + + + , + "salmon_quant_libtype": { + "type": + "string", + "description": "Type: `string`. Override Salmon library type inferred based on strandedness defined in meta object", + "help_text": "Type: `string`. Override Salmon library type inferred based on strandedness defined in meta object." + + } + + + , + "min_mapped_reads": { + "type": + "integer", + "description": "Type: `integer`, default: `5`. Minimum percentage of uniquely mapped reads below which samples are removed from further processing", + "help_text": "Type: `integer`, default: `5`. Minimum percentage of uniquely mapped reads below which samples are removed from further processing." + , + "default":5 + } + + + , + "stringtie_ignore_gtf": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Perform reference-guided de novo assembly of transcripts using StringTie, i", + "help_text": "Type: `boolean_true`, default: `false`. Perform reference-guided de novo assembly of transcripts using StringTie, i.e. don\u0027t restrict to those in GTF file." + , + "default":false + } + + + , + "extra_stringtie_args": { + "type": + "string", + "description": "Type: `string`, default: `-v`. Extra arguments to pass to stringtie command in addition to defaults defined by the pipeline", + "help_text": "Type: `string`, default: `-v`. Extra arguments to pass to stringtie command in addition to defaults defined by the pipeline." + , + "default":"-v" + } + + + , + "save_unaligned": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Where possible, save unaligned reads from either STAR, HISAT2 or Salmon to the results directory", + "help_text": "Type: `boolean_true`, default: `false`. Where possible, save unaligned reads from either STAR, HISAT2 or Salmon to the results directory." + , + "default":false + } + + + , + "save_align_intermeds": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Save the intermediate BAM files from the alignment step", + "help_text": "Type: `boolean_true`, default: `false`. Save the intermediate BAM files from the alignment step." + , + "default":false + } + + + , + "skip_alignment": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip all of the alignment-based processes within the pipeline", + "help_text": "Type: `boolean_true`, default: `false`. Skip all of the alignment-based processes within the pipeline." + , + "default":false + } + + + , + "skip_pseudo_alignment": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip all of the pseudo-alignment-based processes within the pipeline", + "help_text": "Type: `boolean_true`, default: `false`. Skip all of the pseudo-alignment-based processes within the pipeline." + , + "default":false + } + + +} +}, + + + "process skipping options" : { + "title": "Process skipping options", + "type": "object", + "description": "No description", + "properties": { + + + "skip_fastqc": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Skip FatQC step", + "help_text": "Type: `boolean`, default: `false`. Skip FatQC step." + , + "default":false + } + + + , + "skip_trimming": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Skip the adapter trimming step", + "help_text": "Type: `boolean`, default: `false`. Skip the adapter trimming step." + , + "default":false + } + + + , + "skip_umi_extract": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Skip umi_tools extract step", + "help_text": "Type: `boolean`, default: `false`. Skip umi_tools extract step." + , + "default":false + } + + + , + "skip_qc": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip all QC steps except for MultiQC", + "help_text": "Type: `boolean_true`, default: `false`. Skip all QC steps except for MultiQC." + , + "default":false + } + + + , + "skip_markduplicates": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip picard MarkDuplicates step", + "help_text": "Type: `boolean_true`, default: `false`. Skip picard MarkDuplicates step." + , + "default":false + } + + + , + "skip_stringtie": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip StringTie", + "help_text": "Type: `boolean_true`, default: `false`. Skip StringTie." + , + "default":false + } + + + , + "skip_biotype_qc": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip additional featureCounts process for biotype QC", + "help_text": "Type: `boolean_true`, default: `false`. Skip additional featureCounts process for biotype QC." + , + "default":false + } + + + , + "skip_bigwig": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip bigWig file creation", + "help_text": "Type: `boolean_true`, default: `false`. Skip bigWig file creation." + , + "default":false + } + + + , + "skip_preseq": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip Preseq", + "help_text": "Type: `boolean_true`, default: `false`. Skip Preseq." + , + "default":false + } + + + , + "skip_dupradar": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip dupRadar", + "help_text": "Type: `boolean_true`, default: `false`. Skip dupRadar." + , + "default":false + } + + + , + "skip_qualimap": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip Qualimap", + "help_text": "Type: `boolean_true`, default: `false`. Skip Qualimap." + , + "default":false + } + + + , + "skip_rseqc": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip RSeQC", + "help_text": "Type: `boolean_true`, default: `false`. Skip RSeQC." + , + "default":false + } + + + , + "skip_multiqc": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip MultiQC", + "help_text": "Type: `boolean_true`, default: `false`. Skip MultiQC." + , + "default":false + } + + +} +}, + + + "other process arguments" : { + "title": "Other process arguments", + "type": "object", + "description": "No description", + "properties": { + + + "extra_picard_args": { + "type": + "string", + "description": "Type: `string`, default: ` --ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp`. Extra arguments to pass to picard MarkDuplicates command in addition to defaults defined by the pipeline", + "help_text": "Type: `string`, default: ` --ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp`. Extra arguments to pass to picard MarkDuplicates command in addition to defaults defined by the pipeline." + , + "default":" --ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" + } + + + , + "extra_preseq_args": { + "type": + "string", + "description": "Type: `string`, default: `-verbose -seed 1 -seg_len 100000000`. Extra arguments to pass to preseq lc_extrap command in addition to defaults defined by the pipeline", + "help_text": "Type: `string`, default: `-verbose -seed 1 -seg_len 100000000`. Extra arguments to pass to preseq lc_extrap command in addition to defaults defined by the pipeline" + , + "default":"-verbose -seed 1 -seg_len 100000000" + } + + + , + "deseq2_vst": { + "type": + "boolean", + "description": "Type: `boolean`, default: `true`. Use vst transformation instead of rlog with DESeq2", + "help_text": "Type: `boolean`, default: `true`. Use vst transformation instead of rlog with DESeq2" + , + "default":true + } + + + , + "rseqc_modules": { + "type": + "string", + "description": "Type: List of `string`, default: `bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication`, multiple_sep: `\";\"`, choices: ``bam_stat`, `inner_distance`, `infer_experiment`, `junction_annotation`, `junction_saturation`, `read_distribution`, `read_duplication`, `tin``. Specify the RSeQC modules to run_wf (comma-separated list)", + "help_text": "Type: List of `string`, default: `bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication`, multiple_sep: `\";\"`, choices: ``bam_stat`, `inner_distance`, `infer_experiment`, `junction_annotation`, `junction_saturation`, `read_distribution`, `read_duplication`, `tin``. Specify the RSeQC modules to run_wf (comma-separated list)" + , + "default":"bam_stat;inner_distance;infer_experiment;junction_annotation;junction_saturation;read_distribution;read_duplication" + } + + +} +}, + + + "multiqc paramenters" : { + "title": "MultiQC paramenters", + "type": "object", + "description": "No description", + "properties": { + + + "multiqc_custom_config": { + "type": + "string", + "description": "Type: `file`. Custom multiqc configuration file\n", + "help_text": "Type: `file`. Custom multiqc configuration file\n" + + } + + + , + "multiqc_title": { + "type": + "string", + "description": "Type: `string`. Custom multiqc title\n", + "help_text": "Type: `string`. Custom multiqc title\n" + + } + + + , + "multiqc_methods_description": { + "type": + "string", + "description": "Type: `file`. ", + "help_text": "Type: `file`. " + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_fasta": { + "type": + "string", + "description": "Type: `file`, default: `reference/genome.fasta`. ", + "help_text": "Type: `file`, default: `reference/genome.fasta`. " + , + "default":"reference/genome.fasta" + } + + + , + "output_gtf": { + "type": + "string", + "description": "Type: `file`, default: `reference/gene_annotation.gtf`. ", + "help_text": "Type: `file`, default: `reference/gene_annotation.gtf`. " + , + "default":"reference/gene_annotation.gtf" + } + + + , + "output_transcript_fasta": { + "type": + "string", + "description": "Type: `file`, default: `reference/transcriptome.fasta`. ", + "help_text": "Type: `file`, default: `reference/transcriptome.fasta`. " + , + "default":"reference/transcriptome.fasta" + } + + + , + "output_gene_bed": { + "type": + "string", + "description": "Type: `file`, default: `reference/gene_annotation.bed`. ", + "help_text": "Type: `file`, default: `reference/gene_annotation.bed`. " + , + "default":"reference/gene_annotation.bed" + } + + + , + "output_star_index": { + "type": + "string", + "description": "Type: `file`, default: `reference/index/STAR`. Path to STAR index", + "help_text": "Type: `file`, default: `reference/index/STAR`. Path to STAR index." + , + "default":"reference/index/STAR" + } + + + , + "output_salmon_index": { + "type": + "string", + "description": "Type: `file`, default: `reference/index/Salmon`. Path to Salmon index", + "help_text": "Type: `file`, default: `reference/index/Salmon`. Path to Salmon index." + , + "default":"reference/index/Salmon" + } + + + , + "output_bbsplit_index": { + "type": + "string", + "description": "Type: `file`, default: `reference/index/BBSplit`. Path to BBSplit index", + "help_text": "Type: `file`, default: `reference/index/BBSplit`. Path to BBSplit index." + , + "default":"reference/index/BBSplit" + } + + + , + "output_kallisto_index": { + "type": + "string", + "description": "Type: `file`, default: `reference/index/Kallisto`. Path to Kallisto index", + "help_text": "Type: `file`, default: `reference/index/Kallisto`. Path to Kallisto index." + , + "default":"reference/index/Kallisto" + } + + + , + "output_fastq_1": { + "type": + "string", + "description": "Type: `file`, default: `fastq/${id}_r1.fastq.gz`. Path to output directory", + "help_text": "Type: `file`, default: `fastq/${id}_r1.fastq.gz`. Path to output directory" + , + "default":"fastq/${id}_r1.fastq.gz" + } + + + , + "output_fastq_2": { + "type": + "string", + "description": "Type: `file`, default: `fastq/${id}_r2.fastq.gz`. Path to output directory", + "help_text": "Type: `file`, default: `fastq/${id}_r2.fastq.gz`. Path to output directory" + , + "default":"fastq/${id}_r2.fastq.gz" + } + + + , + "fastqc_html_1": { + "type": + "string", + "description": "Type: `file`, default: `fastqc_raw/${id}_r1.fastqc.html`. FastQC HTML report for read 1", + "help_text": "Type: `file`, default: `fastqc_raw/${id}_r1.fastqc.html`. FastQC HTML report for read 1." + , + "default":"fastqc_raw/${id}_r1.fastqc.html" + } + + + , + "fastqc_html_2": { + "type": + "string", + "description": "Type: `file`, default: `fastqc_raw/${id}_r2.fastqc.html`. FastQC HTML report for read 2", + "help_text": "Type: `file`, default: `fastqc_raw/${id}_r2.fastqc.html`. FastQC HTML report for read 2." + , + "default":"fastqc_raw/${id}_r2.fastqc.html" + } + + + , + "fastqc_zip_1": { + "type": + "string", + "description": "Type: `file`, default: `fastqc_raw/${id}_r1.fastqc.zip`. FastQC report archive for read 1", + "help_text": "Type: `file`, default: `fastqc_raw/${id}_r1.fastqc.zip`. FastQC report archive for read 1." + , + "default":"fastqc_raw/${id}_r1.fastqc.zip" + } + + + , + "fastqc_zip_2": { + "type": + "string", + "description": "Type: `file`, default: `fastqc_raw/${id}_r2.fastqc.zip`. FastQC report archive for read 2", + "help_text": "Type: `file`, default: `fastqc_raw/${id}_r2.fastqc.zip`. FastQC report archive for read 2." + , + "default":"fastqc_raw/${id}_r2.fastqc.zip" + } + + + , + "trim_html_1": { + "type": + "string", + "description": "Type: `file`, default: `fastqc_trim/${id}_r1.trimmed_fastqc.html`. ", + "help_text": "Type: `file`, default: `fastqc_trim/${id}_r1.trimmed_fastqc.html`. " + , + "default":"fastqc_trim/${id}_r1.trimmed_fastqc.html" + } + + + , + "trim_html_2": { + "type": + "string", + "description": "Type: `file`, default: `fastqc_trim/${id}_r2.trimmed_fastqc.html`. ", + "help_text": "Type: `file`, default: `fastqc_trim/${id}_r2.trimmed_fastqc.html`. " + , + "default":"fastqc_trim/${id}_r2.trimmed_fastqc.html" + } + + + , + "trim_zip_1": { + "type": + "string", + "description": "Type: `file`, default: `fastqc_trim/${id}_r1.trimmed_fastqc.zip`. ", + "help_text": "Type: `file`, default: `fastqc_trim/${id}_r1.trimmed_fastqc.zip`. " + , + "default":"fastqc_trim/${id}_r1.trimmed_fastqc.zip" + } + + + , + "trim_zip_2": { + "type": + "string", + "description": "Type: `file`, default: `fastqc_trim/${id}_r2.trimmed_fastqc.zip`. ", + "help_text": "Type: `file`, default: `fastqc_trim/${id}_r2.trimmed_fastqc.zip`. " + , + "default":"fastqc_trim/${id}_r2.trimmed_fastqc.zip" + } + + + , + "trim_log_1": { + "type": + "string", + "description": "Type: `file`, default: `trimgalore/${id}_r1.trimming_report.txt`. ", + "help_text": "Type: `file`, default: `trimgalore/${id}_r1.trimming_report.txt`. " + , + "default":"trimgalore/${id}_r1.trimming_report.txt" + } + + + , + "trim_log_2": { + "type": + "string", + "description": "Type: `file`, default: `trimgalore/${id}_r2.trimming_report.txt`. ", + "help_text": "Type: `file`, default: `trimgalore/${id}_r2.trimming_report.txt`. " + , + "default":"trimgalore/${id}_r2.trimming_report.txt" + } + + + , + "fastp_trim_json": { + "type": + "string", + "description": "Type: `file`, default: `fastp/$id_out.json`. The fastp json format report file name", + "help_text": "Type: `file`, default: `fastp/$id_out.json`. The fastp json format report file name" + , + "default":"fastp/$id_out.json" + } + + + , + "fastp_trim_html": { + "type": + "string", + "description": "Type: `file`, default: `fastp/$id_out.html`. The fastp html format report file name", + "help_text": "Type: `file`, default: `fastp/$id_out.html`. The fastp html format report file name" + , + "default":"fastp/$id_out.html" + } + + + , + "sortmerna_log": { + "type": + "string", + "description": "Type: `file`, default: `sortmerna/$id.log`. Sortmerna log file", + "help_text": "Type: `file`, default: `sortmerna/$id.log`. Sortmerna log file." + , + "default":"sortmerna/$id.log" + } + + + , + "star_alignment": { + "type": + "string", + "description": "Type: `file`, default: `STAR/$id`. ", + "help_text": "Type: `file`, default: `STAR/$id`. " + , + "default":"STAR/$id" + } + + + , + "genome_bam_sorted": { + "type": + "string", + "description": "Type: `file`, default: `STAR/genome_processed/$id.genome.bam`. ", + "help_text": "Type: `file`, default: `STAR/genome_processed/$id.genome.bam`. " + , + "default":"STAR/genome_processed/$id.genome.bam" + } + + + , + "genome_bam_index": { + "type": + "string", + "description": "Type: `file`, default: `STAR/genome_processed/$id.genome.bam.bai`. ", + "help_text": "Type: `file`, default: `STAR/genome_processed/$id.genome.bam.bai`. " + , + "default":"STAR/genome_processed/$id.genome.bam.bai" + } + + + , + "transcriptome_bam": { + "type": + "string", + "description": "Type: `file`, default: `STAR/transcriptome_processed/$id.transcriptome.bam`. ", + "help_text": "Type: `file`, default: `STAR/transcriptome_processed/$id.transcriptome.bam`. " + , + "default":"STAR/transcriptome_processed/$id.transcriptome.bam" + } + + + , + "transcriptome_bam_index": { + "type": + "string", + "description": "Type: `file`, default: `STAR/transcriptome_processed/$id.transcriptome.bam.bai`. ", + "help_text": "Type: `file`, default: `STAR/transcriptome_processed/$id.transcriptome.bam.bai`. " + , + "default":"STAR/transcriptome_processed/$id.transcriptome.bam.bai" + } + + + , + "star_log": { + "type": + "string", + "description": "Type: `file`, default: `STAR/log/$id.log`. ", + "help_text": "Type: `file`, default: `STAR/log/$id.log`. " + , + "default":"STAR/log/$id.log" + } + + + , + "genome_bam_stats": { + "type": + "string", + "description": "Type: `file`, default: `samtools_stats/$id.genome.stats`. ", + "help_text": "Type: `file`, default: `samtools_stats/$id.genome.stats`. " + , + "default":"samtools_stats/$id.genome.stats" + } + + + , + "genome_bam_flagstat": { + "type": + "string", + "description": "Type: `file`, default: `samtools_stats/$id.genome.flagstat`. ", + "help_text": "Type: `file`, default: `samtools_stats/$id.genome.flagstat`. " + , + "default":"samtools_stats/$id.genome.flagstat" + } + + + , + "genome_bam_idxstats": { + "type": + "string", + "description": "Type: `file`, default: `samtools_stats/$id.genome.idxstats`. ", + "help_text": "Type: `file`, default: `samtools_stats/$id.genome.idxstats`. " + , + "default":"samtools_stats/$id.genome.idxstats" + } + + + , + "transcriptome_bam_stats": { + "type": + "string", + "description": "Type: `file`, default: `samtools_stats/$id.transcriptome.stats`. ", + "help_text": "Type: `file`, default: `samtools_stats/$id.transcriptome.stats`. " + , + "default":"samtools_stats/$id.transcriptome.stats" + } + + + , + "transcriptome_bam_flagstat": { + "type": + "string", + "description": "Type: `file`, default: `samtools_stats/$id.transcriptome.flagstat`. ", + "help_text": "Type: `file`, default: `samtools_stats/$id.transcriptome.flagstat`. " + , + "default":"samtools_stats/$id.transcriptome.flagstat" + } + + + , + "transcriptome_bam_idxstats": { + "type": + "string", + "description": "Type: `file`, default: `samtools_stats/$id.transcriptome.idxstats`. ", + "help_text": "Type: `file`, default: `samtools_stats/$id.transcriptome.idxstats`. " + , + "default":"samtools_stats/$id.transcriptome.idxstats" + } + + + , + "salmon_quant_results": { + "type": + "string", + "description": "Type: `file`, default: `STAR_Salmon/$id`. ", + "help_text": "Type: `file`, default: `STAR_Salmon/$id`. " + , + "default":"STAR_Salmon/$id" + } + + + , + "salmon_quant_results_file": { + "type": + "string", + "description": "Type: `file`, default: `STAR_Salmon/$id/quant.sf`. ", + "help_text": "Type: `file`, default: `STAR_Salmon/$id/quant.sf`. " + , + "default":"STAR_Salmon/$id/quant.sf" + } + + + , + "pseudo_quant_results": { + "type": + "string", + "description": "Type: `file`, default: `Pseudo_align_quant/$id`. ", + "help_text": "Type: `file`, default: `Pseudo_align_quant/$id`. " + , + "default":"Pseudo_align_quant/$id" + } + + + , + "rsem_counts_gene": { + "type": + "string", + "description": "Type: `file`, default: `RSEM/$id.genes.results`. Expression counts on gene level", + "help_text": "Type: `file`, default: `RSEM/$id.genes.results`. Expression counts on gene level" + , + "default":"RSEM/$id.genes.results" + } + + + , + "rsem_counts_transcripts": { + "type": + "string", + "description": "Type: `file`, default: `RSEM/$id.isoforms.results`. Expression counts on transcript level", + "help_text": "Type: `file`, default: `RSEM/$id.isoforms.results`. Expression counts on transcript level" + , + "default":"RSEM/$id.isoforms.results" + } + + + , + "bam_star_rsem": { + "type": + "string", + "description": "Type: `file`, default: `RSEM/$id.STAR.genome.bam`. BAM file generated by STAR (from RSEM)", + "help_text": "Type: `file`, default: `RSEM/$id.STAR.genome.bam`. BAM file generated by STAR (from RSEM)" + , + "default":"RSEM/$id.STAR.genome.bam" + } + + + , + "bam_genome_rsem": { + "type": + "string", + "description": "Type: `file`, default: `RSEM/$id.genome.bam`. Genome BAM file (from RSEM)", + "help_text": "Type: `file`, default: `RSEM/$id.genome.bam`. Genome BAM file (from RSEM)" + , + "default":"RSEM/$id.genome.bam" + } + + + , + "bam_transcript_rsem": { + "type": + "string", + "description": "Type: `file`, default: `RSEM/$id.transcript.bam`. Transcript BAM file (from RSEM)", + "help_text": "Type: `file`, default: `RSEM/$id.transcript.bam`. Transcript BAM file (from RSEM)" + , + "default":"RSEM/$id.transcript.bam" + } + + + , + "tpm_gene": { + "type": + "string", + "description": "Type: `file`, default: `transcript_quantification/gene_tpm.tsv`. ", + "help_text": "Type: `file`, default: `transcript_quantification/gene_tpm.tsv`. " + , + "default":"transcript_quantification/gene_tpm.tsv" + } + + + , + "counts_gene": { + "type": + "string", + "description": "Type: `file`, default: `transcript_quantification/gene_counts.tsv`. ", + "help_text": "Type: `file`, default: `transcript_quantification/gene_counts.tsv`. " + , + "default":"transcript_quantification/gene_counts.tsv" + } + + + , + "counts_gene_length_scaled": { + "type": + "string", + "description": "Type: `file`, default: `transcript_quantification/gene_counts_length_scaled.tsv`. ", + "help_text": "Type: `file`, default: `transcript_quantification/gene_counts_length_scaled.tsv`. " + , + "default":"transcript_quantification/gene_counts_length_scaled.tsv" + } + + + , + "counts_gene_scaled": { + "type": + "string", + "description": "Type: `file`, default: `transcript_quantification/gene_counts_scaled.tsv`. ", + "help_text": "Type: `file`, default: `transcript_quantification/gene_counts_scaled.tsv`. " + , + "default":"transcript_quantification/gene_counts_scaled.tsv" + } + + + , + "tpm_transcript": { + "type": + "string", + "description": "Type: `file`, default: `transcript_quantification/transcript_tpm.tsv`. ", + "help_text": "Type: `file`, default: `transcript_quantification/transcript_tpm.tsv`. " + , + "default":"transcript_quantification/transcript_tpm.tsv" + } + + + , + "counts_transcript": { + "type": + "string", + "description": "Type: `file`, default: `transcript_quantification/transcript_counts.tsv`. ", + "help_text": "Type: `file`, default: `transcript_quantification/transcript_counts.tsv`. " + , + "default":"transcript_quantification/transcript_counts.tsv" + } + + + , + "quant_merged_summarizedexperiment": { + "type": + "string", + "description": "Type: `file`, default: `transcript_quantification/summarizedexperiment`. ", + "help_text": "Type: `file`, default: `transcript_quantification/summarizedexperiment`. " + , + "default":"transcript_quantification/summarizedexperiment" + } + + + , + "markduplicates_metrics": { + "type": + "string", + "description": "Type: `file`, default: `picard/$id.MarkDuplicates.metrics.txt`. ", + "help_text": "Type: `file`, default: `picard/$id.MarkDuplicates.metrics.txt`. " + , + "default":"picard/$id.MarkDuplicates.metrics.txt" + } + + + , + "stringtie_transcript_gtf": { + "type": + "string", + "description": "Type: `file`, default: `stringtie/$id.transcripts.gtf`. ", + "help_text": "Type: `file`, default: `stringtie/$id.transcripts.gtf`. " + , + "default":"stringtie/$id.transcripts.gtf" + } + + + , + "stringtie_coverage_gtf": { + "type": + "string", + "description": "Type: `file`, default: `stringtie/$id.coverage.gtf`. ", + "help_text": "Type: `file`, default: `stringtie/$id.coverage.gtf`. " + , + "default":"stringtie/$id.coverage.gtf" + } + + + , + "stringtie_abundance": { + "type": + "string", + "description": "Type: `file`, default: `stringtie/$id.gene_abundance.txt`. ", + "help_text": "Type: `file`, default: `stringtie/$id.gene_abundance.txt`. " + , + "default":"stringtie/$id.gene_abundance.txt" + } + + + , + "stringtie_ballgown": { + "type": + "string", + "description": "Type: `file`, default: `stringtie/$id.ballgown`. ", + "help_text": "Type: `file`, default: `stringtie/$id.ballgown`. " + , + "default":"stringtie/$id.ballgown" + } + + + , + "featurecounts": { + "type": + "string", + "description": "Type: `file`, default: `featurecounts/$id.featureCounts.txt`. ", + "help_text": "Type: `file`, default: `featurecounts/$id.featureCounts.txt`. " + , + "default":"featurecounts/$id.featureCounts.txt" + } + + + , + "featurecounts_summary": { + "type": + "string", + "description": "Type: `file`, default: `featurecounts/$id.featureCounts.txt.summary`. ", + "help_text": "Type: `file`, default: `featurecounts/$id.featureCounts.txt.summary`. " + , + "default":"featurecounts/$id.featureCounts.txt.summary" + } + + + , + "featurecounts_multiqc": { + "type": + "string", + "description": "Type: `file`, default: `featurecounts/$id.featureCounts_mqc.tsv`. ", + "help_text": "Type: `file`, default: `featurecounts/$id.featureCounts_mqc.tsv`. " + , + "default":"featurecounts/$id.featureCounts_mqc.tsv" + } + + + , + "featurecounts_rrna_multiqc": { + "type": + "string", + "description": "Type: `file`, default: `featurecounts/$id.featureCounts_rrna_mqc.tsv`. ", + "help_text": "Type: `file`, default: `featurecounts/$id.featureCounts_rrna_mqc.tsv`. " + , + "default":"featurecounts/$id.featureCounts_rrna_mqc.tsv" + } + + + , + "bedgraph_forward": { + "type": + "string", + "description": "Type: `file`, default: `bedgraph/$id.forward.bedgraph`. ", + "help_text": "Type: `file`, default: `bedgraph/$id.forward.bedgraph`. " + , + "default":"bedgraph/$id.forward.bedgraph" + } + + + , + "bedgraph_reverse": { + "type": + "string", + "description": "Type: `file`, default: `bedgraph/$id.reverse.bedgraph`. ", + "help_text": "Type: `file`, default: `bedgraph/$id.reverse.bedgraph`. " + , + "default":"bedgraph/$id.reverse.bedgraph" + } + + + , + "bigwig_forward": { + "type": + "string", + "description": "Type: `file`, default: `bigwig/$id.forward.bigwig`. ", + "help_text": "Type: `file`, default: `bigwig/$id.forward.bigwig`. " + , + "default":"bigwig/$id.forward.bigwig" + } + + + , + "bigwig_reverse": { + "type": + "string", + "description": "Type: `file`, default: `bigwig/$id.reverse.bigwig`. ", + "help_text": "Type: `file`, default: `bigwig/$id.reverse.bigwig`. " + , + "default":"bigwig/$id.reverse.bigwig" + } + + + , + "preseq_output": { + "type": + "string", + "description": "Type: `file`, default: `preseq/$id.lc_extrap.txt`. ", + "help_text": "Type: `file`, default: `preseq/$id.lc_extrap.txt`. " + , + "default":"preseq/$id.lc_extrap.txt" + } + + + , + "bamstat_output": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/bamstat/$id.mapping_quality.txt`. Path to output file (txt) of mapping quality statistics", + "help_text": "Type: `file`, default: `RSeQC/bamstat/$id.mapping_quality.txt`. Path to output file (txt) of mapping quality statistics" + , + "default":"RSeQC/bamstat/$id.mapping_quality.txt" + } + + + , + "strandedness_output": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/inferexperiment/$id.strandedness.txt`. Path to output report (txt) of inferred strandedness", + "help_text": "Type: `file`, default: `RSeQC/inferexperiment/$id.strandedness.txt`. Path to output report (txt) of inferred strandedness" + , + "default":"RSeQC/inferexperiment/$id.strandedness.txt" + } + + + , + "inner_dist_output_stats": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/innerdistance/$id.inner_distance.stats`. output file (txt) with summary statistics of inner distances of paired reads", + "help_text": "Type: `file`, default: `RSeQC/innerdistance/$id.inner_distance.stats`. output file (txt) with summary statistics of inner distances of paired reads" + , + "default":"RSeQC/innerdistance/$id.inner_distance.stats" + } + + + , + "inner_dist_output_dist": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/innerdistance/txt/$id.inner_distance.txt`. output file (txt) with inner distances of all paired reads", + "help_text": "Type: `file`, default: `RSeQC/innerdistance/txt/$id.inner_distance.txt`. output file (txt) with inner distances of all paired reads" + , + "default":"RSeQC/innerdistance/txt/$id.inner_distance.txt" + } + + + , + "inner_dist_output_freq": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/innerdistance/txt/$id.inner_distance_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads", + "help_text": "Type: `file`, default: `RSeQC/innerdistance/txt/$id.inner_distance_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads" + , + "default":"RSeQC/innerdistance/txt/$id.inner_distance_freq.txt" + } + + + , + "inner_dist_output_plot": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/innerdistance/pdf/$id.inner_distance_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `RSeQC/innerdistance/pdf/$id.inner_distance_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads" + , + "default":"RSeQC/innerdistance/pdf/$id.inner_distance_plot.pdf" + } + + + , + "inner_dist_output_plot_r": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/innerdistance/rscript/$id.inner_distance_plot.r`. output file (R) with script of histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `RSeQC/innerdistance/rscript/$id.inner_distance_plot.r`. output file (R) with script of histogram plot of of inner distances of all paired reads" + , + "default":"RSeQC/innerdistance/rscript/$id.inner_distance_plot.r" + } + + + , + "junction_annotation_output_log": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/junctionannotation/log/$id.junction_annotation.log`. output log of junction annotation script", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/log/$id.junction_annotation.log`. output log of junction annotation script" + , + "default":"RSeQC/junctionannotation/log/$id.junction_annotation.log" + } + + + , + "junction_annotation_output_plot_r": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/junctionannotation/rscript/$id.junction_annotation_plot.r`. R script to generate splice_junction and splice_events plot", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/rscript/$id.junction_annotation_plot.r`. R script to generate splice_junction and splice_events plot" + , + "default":"RSeQC/junctionannotation/rscript/$id.junction_annotation_plot.r" + } + + + , + "junction_annotation_output_junction_bed": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/junctionannotation/bed/$id.junction_annotation.bed`. junction annotation file (bed format)", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/bed/$id.junction_annotation.bed`. junction annotation file (bed format)" + , + "default":"RSeQC/junctionannotation/bed/$id.junction_annotation.bed" + } + + + , + "junction_annotation_output_junction_interact": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/junctionannotation/bed/$id.junction_annotation.Interact.bed`. interact file (bed format) of junctions", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/bed/$id.junction_annotation.Interact.bed`. interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization." + , + "default":"RSeQC/junctionannotation/bed/$id.junction_annotation.Interact.bed" + } + + + , + "junction_annotation_output_junction_sheet": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/junctionannotation/xls/$id.junction_annotation.xls`. junction annotation file (xls format)", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/xls/$id.junction_annotation.xls`. junction annotation file (xls format)" + , + "default":"RSeQC/junctionannotation/xls/$id.junction_annotation.xls" + } + + + , + "junction_annotation_output_splice_events_plot": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/junctionannotation/pdf/$id.splice_events.pdf`. plot of splice events (pdf)", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/pdf/$id.splice_events.pdf`. plot of splice events (pdf)" + , + "default":"RSeQC/junctionannotation/pdf/$id.splice_events.pdf" + } + + + , + "junction_annotation_output_splice_junctions_plot": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/junctionannotation/pdf/$id.splice_junctions_plot.pdf`. plot of junctions (pdf)", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/pdf/$id.splice_junctions_plot.pdf`. plot of junctions (pdf)" + , + "default":"RSeQC/junctionannotation/pdf/$id.splice_junctions_plot.pdf" + } + + + , + "junction_saturation_output_plot_r": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/junctionsaturation/rscript/$id.junction_saturation_plot.r`. r script to generate junction_saturation_plot plot", + "help_text": "Type: `file`, default: `RSeQC/junctionsaturation/rscript/$id.junction_saturation_plot.r`. r script to generate junction_saturation_plot plot" + , + "default":"RSeQC/junctionsaturation/rscript/$id.junction_saturation_plot.r" + } + + + , + "junction_saturation_output_plot": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/junctionsaturation/pdf/$id.junction_saturation_plot.pdf`. plot of junction saturation (pdf", + "help_text": "Type: `file`, default: `RSeQC/junctionsaturation/pdf/$id.junction_saturation_plot.pdf`. plot of junction saturation (pdf" + , + "default":"RSeQC/junctionsaturation/pdf/$id.junction_saturation_plot.pdf" + } + + + , + "read_distribution_output": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/readdistribution/$id.read_distribution.txt`. output file (txt) of read distribution analysis", + "help_text": "Type: `file`, default: `RSeQC/readdistribution/$id.read_distribution.txt`. output file (txt) of read distribution analysis." + , + "default":"RSeQC/readdistribution/$id.read_distribution.txt" + } + + + , + "read_duplication_output_duplication_rate_plot_r": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/readduplication/rscrpt/$id.duplication_rate_plot.r`. R script for generating duplication rate plot", + "help_text": "Type: `file`, default: `RSeQC/readduplication/rscrpt/$id.duplication_rate_plot.r`. R script for generating duplication rate plot" + , + "default":"RSeQC/readduplication/rscrpt/$id.duplication_rate_plot.r" + } + + + , + "read_duplication_output_duplication_rate_plot": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/readduplication/pdf/$id.duplication_rate_plot.pdf`. duplication rate plot (pdf)", + "help_text": "Type: `file`, default: `RSeQC/readduplication/pdf/$id.duplication_rate_plot.pdf`. duplication rate plot (pdf)" + , + "default":"RSeQC/readduplication/pdf/$id.duplication_rate_plot.pdf" + } + + + , + "read_duplication_output_duplication_rate_mapping": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/readduplication/xls/$id.duplication_rate_mapping.xls`. Summary of mapping-based read duplication", + "help_text": "Type: `file`, default: `RSeQC/readduplication/xls/$id.duplication_rate_mapping.xls`. Summary of mapping-based read duplication" + , + "default":"RSeQC/readduplication/xls/$id.duplication_rate_mapping.xls" + } + + + , + "read_duplication_output_duplication_rate_sequence": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/readduplication/xls/$id.duplication_rate_sequencing.xls`. Summary of sequencing-based read duplication", + "help_text": "Type: `file`, default: `RSeQC/readduplication/xls/$id.duplication_rate_sequencing.xls`. Summary of sequencing-based read duplication" + , + "default":"RSeQC/readduplication/xls/$id.duplication_rate_sequencing.xls" + } + + + , + "tin_output_summary": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/tin/txt/$id.tin_summary.txt`. summary statistics (txt) of calculated TIN metrics", + "help_text": "Type: `file`, default: `RSeQC/tin/txt/$id.tin_summary.txt`. summary statistics (txt) of calculated TIN metrics" + , + "default":"RSeQC/tin/txt/$id.tin_summary.txt" + } + + + , + "tin_output_metrics": { + "type": + "string", + "description": "Type: `file`, default: `RSeQC/tin/xls/$id.tin.xls`. file with TIN metrics (xls)", + "help_text": "Type: `file`, default: `RSeQC/tin/xls/$id.tin.xls`. file with TIN metrics (xls)" + , + "default":"RSeQC/tin/xls/$id.tin.xls" + } + + + , + "dupradar_output_dupmatrix": { + "type": + "string", + "description": "Type: `file`, default: `dupradar/gene_data/$id.dup_matrix.txt`. path to output file (txt) of duplicate tag counts", + "help_text": "Type: `file`, default: `dupradar/gene_data/$id.dup_matrix.txt`. path to output file (txt) of duplicate tag counts" + , + "default":"dupradar/gene_data/$id.dup_matrix.txt" + } + + + , + "dupradar_output_dup_intercept_mqc": { + "type": + "string", + "description": "Type: `file`, default: `dupradar/mqc_intercept/$id.dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar", + "help_text": "Type: `file`, default: `dupradar/mqc_intercept/$id.dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar" + , + "default":"dupradar/mqc_intercept/$id.dup_intercept_mqc.txt" + } + + + , + "dupradar_output_duprate_exp_boxplot": { + "type": + "string", + "description": "Type: `file`, default: `dupradar/box_plot/$id.duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot", + "help_text": "Type: `file`, default: `dupradar/box_plot/$id.duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot" + , + "default":"dupradar/box_plot/$id.duprate_exp_boxplot.pdf" + } + + + , + "dupradar_output_duprate_exp_densplot": { + "type": + "string", + "description": "Type: `file`, default: `dupradar/scatter_plot/$id.duprate_exp_densityplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts", + "help_text": "Type: `file`, default: `dupradar/scatter_plot/$id.duprate_exp_densityplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts" + , + "default":"dupradar/scatter_plot/$id.duprate_exp_densityplot.pdf" + } + + + , + "dupradar_output_duprate_exp_denscurve_mqc": { + "type": + "string", + "description": "Type: `file`, default: `dupradar/density_curve/$id.duprate_exp_density_curve_mqc.pdf`. path to output file (pdf) of density curve of gene duplication multiqc", + "help_text": "Type: `file`, default: `dupradar/density_curve/$id.duprate_exp_density_curve_mqc.pdf`. path to output file (pdf) of density curve of gene duplication multiqc" + , + "default":"dupradar/density_curve/$id.duprate_exp_density_curve_mqc.pdf" + } + + + , + "dupradar_output_expression_histogram": { + "type": + "string", + "description": "Type: `file`, default: `dupradar/histogram/$id.expression_hist.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram", + "help_text": "Type: `file`, default: `dupradar/histogram/$id.expression_hist.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram" + , + "default":"dupradar/histogram/$id.expression_hist.pdf" + } + + + , + "dupradar_output_intercept_slope": { + "type": + "string", + "description": "Type: `file`, default: `dupradar/intercept_slope/$id.intercept_slope.txt`. ", + "help_text": "Type: `file`, default: `dupradar/intercept_slope/$id.intercept_slope.txt`. " + , + "default":"dupradar/intercept_slope/$id.intercept_slope.txt" + } + + + , + "qualimap_qc_report": { + "type": + "string", + "description": "Type: `file`, default: `Qualimap/$id.rnaseq_qc_results.txt`. Text file containing the RNAseq QC results", + "help_text": "Type: `file`, default: `Qualimap/$id.rnaseq_qc_results.txt`. Text file containing the RNAseq QC results." + , + "default":"Qualimap/$id.rnaseq_qc_results.txt" + } + + + , + "qualimap_counts": { + "type": + "string", + "description": "Type: `file`, default: `Qualimap/$id.counts.txt`. Output file for computed counts", + "help_text": "Type: `file`, default: `Qualimap/$id.counts.txt`. Output file for computed counts." + , + "default":"Qualimap/$id.counts.txt" + } + + + , + "qualimap_report": { + "type": + "string", + "description": "Type: `file`, default: `Qualimap/$id.report.html`. Report output file", + "help_text": "Type: `file`, default: `Qualimap/$id.report.html`. Report output file. Supported formats are PDF or HTML." + , + "default":"Qualimap/$id.report.html" + } + + + , + "deseq2_output": { + "type": + "string", + "description": "Type: `file`, default: `deseq2_qc`. ", + "help_text": "Type: `file`, default: `deseq2_qc`. " + , + "default":"deseq2_qc" + } + + + , + "deseq2_output_pseudo": { + "type": + "string", + "description": "Type: `file`, default: `deseq2_qc_pseudo`. ", + "help_text": "Type: `file`, default: `deseq2_qc_pseudo`. " + , + "default":"deseq2_qc_pseudo" + } + + + , + "multiqc_report": { + "type": + "string", + "description": "Type: `file`, default: `multiqc/multiqc_report.html`. ", + "help_text": "Type: `file`, default: `multiqc/multiqc_report.html`. " + , + "default":"multiqc/multiqc_report.html" + } + + + , + "multiqc_data": { + "type": + "string", + "description": "Type: `file`, default: `multiqc/multiqc_data`. ", + "help_text": "Type: `file`, default: `multiqc/multiqc_data`. " + , + "default":"multiqc/multiqc_data" + } + + + , + "multiqc_plots": { + "type": + "string", + "description": "Type: `file`, default: `multiqc/multiqc_plots`. ", + "help_text": "Type: `file`, default: `multiqc/multiqc_plots`. " + , + "default":"multiqc/multiqc_plots" + } + + + , + "multiqc_versions": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.multiqc_versions`. ", + "help_text": "Type: `file`, default: `$id.$key.multiqc_versions`. " + , + "default":"$id.$key.multiqc_versions" + } + + + , + "pseudo_counts_gene": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_alignment_quantification/gene_counts.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/gene_counts.tsv`. " + , + "default":"pseudo_alignment_quantification/gene_counts.tsv" + } + + + , + "pseudo_counts_gene_length_scaled": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_alignment_quantification/gene_counts_length_scaled.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/gene_counts_length_scaled.tsv`. " + , + "default":"pseudo_alignment_quantification/gene_counts_length_scaled.tsv" + } + + + , + "pseudo_counts_gene_scaled": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_alignment_quantification/gene_counts_scaled.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/gene_counts_scaled.tsv`. " + , + "default":"pseudo_alignment_quantification/gene_counts_scaled.tsv" + } + + + , + "pseudo_tpm_gene": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_alignment_quantification/gene_tpm.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/gene_tpm.tsv`. " + , + "default":"pseudo_alignment_quantification/gene_tpm.tsv" + } + + + , + "pseudo_tpm_transcript": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_alignment_quantification/transcript_tpm.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/transcript_tpm.tsv`. " + , + "default":"pseudo_alignment_quantification/transcript_tpm.tsv" + } + + + , + "pseudo_counts_transcript": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_alignment_quantification/transcript_counts.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/transcript_counts.tsv`. " + , + "default":"pseudo_alignment_quantification/transcript_counts.tsv" + } + + + , + "pseudo_quant_merged_summarizedexperiment": { + "type": + "string", + "description": "Type: `file`, default: `pseudo_alignment_quantification/quant_merged_summarizedexperiment`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/quant_merged_summarizedexperiment`. " + , + "default":"pseudo_alignment_quantification/quant_merged_summarizedexperiment" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/reference genome options" + }, + + { + "$ref": "#/definitions/read trimming options" + }, + + { + "$ref": "#/definitions/read filtering options" + }, + + { + "$ref": "#/definitions/umi options" + }, + + { + "$ref": "#/definitions/alignment options" + }, + + { + "$ref": "#/definitions/process skipping options" + }, + + { + "$ref": "#/definitions/other process arguments" + }, + + { + "$ref": "#/definitions/multiqc paramenters" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +}